Rollup for calculation_interaction_zscores refactor

2024-09-01 03:18:42 -04:00
parent 05cd1a85d8
commit a484a2b104
495 changed files with 412 additions and 6011712 deletions
--- a/workflow/apps/r/calculate_interaction_zscores.R
+++ b/workflow/apps/r/calculate_interaction_zscores.R
@@ -7,13 +7,13 @@ suppressMessages({
  library(data.table)
 })

+options(warn = 2, max.print = 1000)
+
 # Constants for configuration
 PLOT_WIDTH <- 14
 PLOT_HEIGHT <- 9
 BASE_SIZE <- 14

-options(warn = 2, max.print = 100)
-
 parse_arguments <- function() {
  args <- if (interactive()) {
    c(
@@ -29,11 +29,11 @@ parse_arguments <- function() {
  } else {
    commandArgs(trailingOnly = TRUE)
  }
-
+  
  paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
  names <- args[seq(6, length(args), by = 2)]
  experiments <- setNames(paths, names)
-
+  
  list(
    out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
    sd = as.numeric(args[2]),
@@ -49,7 +49,7 @@ dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE)
 dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE)

 # Define themes and scales
-theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
+theme_publication <- function(base_size = BASE_SIZE, base_family = "sans", legend_position = "bottom") {
  theme_foundation(base_size = base_size, base_family = base_family) +
    theme(
      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
@@ -64,25 +64,14 @@ theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
      panel.grid.major = element_line(colour = "#f0f0f0"),
      panel.grid.minor = element_blank(),
      legend.key = element_rect(colour = NA),
-      legend.position = "bottom",
-      legend.direction = "horizontal",
+      legend.position = legend_position,
+      legend.direction = ifelse(legend_position == "right", "vertical", "horizontal"),
      plot.margin = unit(c(10, 5, 5, 5), "mm"),
      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
      strip.text = element_text(face = "bold")
    )
 }

-theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
-  theme_publication(base_size, base_family) +
-    theme(
-      legend.position = "right",
-      legend.direction = "vertical",
-      legend.key.size = unit(0.5, "cm"),
-      legend.spacing = unit(0, "cm"),
-      legend.title = element_text(face = "italic")
-    )
-}
-
 scale_fill_publication <- function(...) {
  discrete_scale("fill", "Publication", manual_pal(values = c(
    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
@@ -100,58 +89,91 @@ scale_colour_publication <- function(...) {
 # Load SGD gene list
 sgd_genes <- function(sgd_gene_list) {
  read.delim(file = sgd_gene_list, quote = "", header = FALSE,
-            colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
+             colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
    dplyr::rename(ORF = V4, GeneName = V5)
 }

 genes <- sgd_genes(args$sgd_gene_list)

+# Define the adjust_values function
+adjust_values <- function(x, delta_bg, tolerance) {
+  valid_x <- x[x != 0 & !is.na(x)]
+  max_x <- ifelse(length(valid_x) == 0, NA_real_, max(valid_x, na.rm = TRUE))
+  
+  x_adjusted <- ifelse(x == 0 & !is.na(x), max_x, x)
+  
+  # Adjust only if delta_bg is valid and above tolerance
+  x_adjusted <- ifelse(!is.na(delta_bg) & delta_bg >= tolerance, NA_real_, x_adjusted)
+  
+  return(x_adjusted)
+}
+
+
+
 load_and_preprocess_data <- function(easy_results_file, genes) {
-  df <- tryCatch({
+  df <-
    read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
-  }, error = function(e) {
-    stop("Error reading file: ", easy_results_file, "\n", e$message)
-  })

-  # Convert specific columns to numeric
-  numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
-  df[numeric_columns[numeric_columns %in% colnames(df)]] <- 
-    suppressWarnings(lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric))
-
-  # Apply initial filters and mutating steps as in the original script
+  # Apply further transformations
  df <- df %>%
-    mutate(L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}) %>%
-    mutate(AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}) %>%
-    filter(!.[[1]] %in% c("", "Scan")) %>%            # Filter out empty or Scan rows
-    filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>%  # Remove blank genes
-    filter(Drug != "BMH21") %>%                       # Filter out specific drugs if necessary
-    filter(!is.na(ORF) & ORF != "") %>%               # Ensure ORF is not NA or empty
-    mutate(OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
-    mutate(conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc))) %>%  # Extract numeric drug concentrations
-    filter(!is.na(LstBackgrd) & !is.na(X1stBackgrd)) %>%  # Ensure finite background values
-    mutate(delta_bg = LstBackgrd - X1stBackgrd) %>%  # Calculate delta background
-    filter(!is.na(L) & !is.na(K) & !is.na(r) & !is.na(AUC96)) %>%  # Ensure finite measurements
-    mutate(GeneName = vapply(ORF, function(orf) {
-      gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
-      ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
-    }, character(1)))
+    filter(
+      !is.na(ORF) & ORF != "",
+      !ORF %in% c("Blank"),
+      !is.na(Gene) & !Gene %in% c("BLANK", "Blank", "blank"),
+      Drug != "BMH21",
+      !is.na(Scan),
+      !is.na(l), !is.na(K),
+      !is.na(r), !is.na(AUC96)
+    ) %>%
+    mutate(
+      Col = suppressWarnings(as.numeric(Col)),
+      Row = suppressWarnings(as.numeric(Row)),
+      L = suppressWarnings(as.numeric(l)),
+      K = suppressWarnings(as.numeric(K)),
+      r = suppressWarnings(as.numeric(r)),
+      Scan = suppressWarnings(as.numeric(Scan)),
+      AUC = suppressWarnings(as.numeric(AUC96)),
+      LstBackgrd = suppressWarnings(as.numeric(LstBackgrd)),
+      X1stBackgrd = suppressWarnings(as.numeric(X1stBackgrd)),
+      delta_bg = ifelse(is.na(LstBackgrd) | is.na(X1stBackgrd), NA_real_, LstBackgrd - X1stBackgrd),
+      delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + 3 * sd(delta_bg, na.rm = TRUE),
+      OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep),
+      OrfRepUnique = paste(OrfRep, Gene, Num., sep = "_"),
+      conc_num = suppressWarnings(as.numeric(gsub("[^0-9\\.]", "", Conc))),
+      DB = as.integer(delta_bg >= delta_bg_tolerance),
+      SM = 0,
+      NG = as.integer(l == 0 & !is.na(l)),
+      L_adjusted = adjust_values(L, delta_bg, delta_bg_tolerance),
+      r_adjusted = adjust_values(r, delta_bg, delta_bg_tolerance),
+      AUC_adjusted = adjust_values(AUC, delta_bg, delta_bg_tolerance),
+      K_adjusted = adjust_values(K, delta_bg, delta_bg_tolerance),
+      max_L_adjusted = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
+      SM = ifelse(!is.na(max_L_adjusted) & L_adjusted == max_L_adjusted, 1, SM)
+    )

-  if (nrow(df) == 0) warning("Dataframe is empty after filtering")
+  # Efficiently handle the assignment of Gene names
+  gene_map <- setNames(genes$GeneName, genes$ORF)
+  df$Gene <- ifelse(df$OrfRep != "YDL227C", gene_map[df$ORF], df$Gene)
+  df$Gene <- ifelse(df$Gene == "" | df$Gene == "OCT1", df$OrfRep, df$Gene)

  return(df)
 }

-create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
-  if (!("Scan" %in% colnames(df))) {
-    warning("'Scan' column is not present in the data. Skipping this plot.")
-    return(NULL)
-  }

-  plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
-  filtered_df <- df[is.finite(df[[var]]), ]
-  
-  p <- ggplot(filtered_df, aes(Scan, !!sym(var), color = as.factor(conc_num))) +
-    plot_func(shape = 3, size = 0.2, position = "jitter") +
+
+
+generate_plot <- function(df, var, plot_type = "scatter", out_dir_qc, suffix = "") {
+
+  # Filter out non-finite values before plotting
+  df <- df %>%
+    filter(is.finite(!!sym(var)))
+
+  plot_func <- switch(plot_type,
+    scatter = geom_point(shape = 3, size = 0.2, position = "jitter"),
+    boxplot = geom_boxplot())
+
+  p <- ggplot(df, aes(Scan, !!sym(var), color = as.factor(conc_num))) +
+    plot_func +
    stat_summary(fun = mean, geom = "point", size = 0.6) +
    stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
    ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
@@ -161,184 +183,233 @@ create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "")
  pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
  print(p)
  dev.off()
-
+  
  html_path <- sub(".pdf$", ".html", pdf_path)
-  pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
+  pgg <- suppressWarnings(ggplotly(p) %>%
    layout(legend = list(orientation = "h")))
  saveWidget(pgg, html_path, selfcontained = TRUE)
 }

-publish_summary_stats <- function(df, variables, out_dir) {
-  stats_list <- lapply(variables, function(var) {
-    df %>%
-      dplyr::group_by(OrfRep, conc_num) %>%
-      dplyr::summarize(
-        N = dplyr::n(),
-        mean_val = mean(.data[[var]], na.rm = TRUE),
-        sd_val = sd(.data[[var]], na.rm = TRUE),
-        se_val = sd_val / sqrt(N)
-      )
-  })
-  summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
-  fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
-}
-
-# Compute Interaction Scores
-compute_interaction_scores <- function(df) {
-  # Calculate raw shifts
-  raw_shifts <- df %>%
-    group_by(OrfRep) %>%
-    reframe(
-      Raw_Shift_L = mean(L, na.rm = TRUE),
-      Raw_Shift_K = mean(K, na.rm = TRUE),
-      Raw_Shift_r = mean(r, na.rm = TRUE),
-      Raw_Shift_AUC = mean(AUC, na.rm = TRUE),
-      NG = n()
-    )
-  
-  # Calculate Z-scores
-  z_scores <- df %>%
-    group_by(OrfRep) %>%
-    reframe(
-      Z_Shift_L = mean(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Z_Shift_K = mean(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Z_Shift_r = mean(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Z_Shift_AUC = mean(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE)
-    )
-  
-  # Linear Model Scores and R-Squared
-  lm_scores <- df %>%
-    group_by(OrfRep) %>%
-    reframe(
-      lm_Score_L = coef(lm(L ~ delta_bg, data = .))[2],
-      R_Squared_L = summary(lm(L ~ delta_bg, data = .))$r.squared,
-      lm_Score_K = coef(lm(K ~ delta_bg, data = .))[2],
-      R_Squared_K = summary(lm(K ~ delta_bg, data = .))$r.squared,
-      lm_Score_r = coef(lm(r ~ delta_bg, data = .))[2],
-      R_Squared_r = summary(lm(r ~ delta_bg, data = .))$r.squared,
-      lm_Score_AUC = coef(lm(AUC ~ delta_bg, data = .))[2],
-      R_Squared_AUC = summary(lm(AUC ~ delta_bg, data = .))$r.squared
-    )
-  
-  # Calculate Sum and Average Z-scores
-  sum_avg_z_scores <- df %>%
-    group_by(OrfRep) %>%
-    reframe(
-      Sum_Z_Score_L = sum(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Avg_Zscore_L = mean(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Sum_Z_Score_K = sum(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Avg_Zscore_K = mean(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Sum_Z_Score_r = sum(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Avg_Zscore_r = mean(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Sum_Z_Score_AUC = sum(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
-      Avg_Zscore_AUC = mean(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE)
-    )
-  
-  # Combine all calculations
-  interaction_scores <- raw_shifts %>%
-    left_join(z_scores, by = "OrfRep") %>%
-    left_join(lm_scores, by = "OrfRep") %>%
-    left_join(sum_avg_z_scores, by = "OrfRep") %>%
-    mutate(
-      l_rank = rank(-Raw_Shift_L),
-      k_rank = rank(-Raw_Shift_K),
-      r_rank = rank(-Raw_Shift_r),
-      auc_rank = rank(-Raw_Shift_AUC)
-    )
-  
-  return(interaction_scores)
-}
-
-
-publish_interaction_scores <- function(df, out_dir) {
-  interaction_scores <- compute_interaction_scores(df)
-  
-  # Additional enhancer and suppressor calculations and outputs
-  deletion_enhancers_L <- interaction_scores[interaction_scores$Raw_Shift_L >= 2, ]
-  deletion_enhancers_L <- deletion_enhancers_L[!is.na(deletion_enhancers_L$OrfRep), ]
-
-  deletion_enhancers_K <- interaction_scores[interaction_scores$Raw_Shift_K <= -2, ]
-  deletion_enhancers_K <- deletion_enhancers_K[!is.na(deletion_enhancers_K$OrfRep), ]
-
-  deletion_suppressors_L <- interaction_scores[interaction_scores$Raw_Shift_L <= -2, ]
-  deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ]
-
-  deletion_suppressors_K <- interaction_scores[interaction_scores$Raw_Shift_K >= 2, ]
-  deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ]
-
-  deletion_enhancers_and_suppressors_L <- interaction_scores[
-    interaction_scores$Raw_Shift_L >= 2 | interaction_scores$Raw_Shift_L <= -2, ]
-  deletion_enhancers_and_suppressors_L <- deletion_enhancers_and_suppressors_L[
-    !is.na(deletion_enhancers_and_suppressors_L$OrfRep), ]
-
-  deletion_enhancers_and_suppressors_K <- interaction_scores[
-    interaction_scores$Raw_Shift_K >= 2 | interaction_scores$Raw_Shift_K <= -2, ]
-  deletion_enhancers_and_suppressors_K <- deletion_enhancers_and_suppressors_K[
-    !is.na(deletion_enhancers_and_suppressors_K$OrfRep), ]
-
-  # Write CSV files with updated filenames
-  fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
-  fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank), 
-        file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
-  fwrite(deletion_enhancers_L, file.path(out_dir, "deletion_enhancers_l.csv"), row.names = FALSE)
-  fwrite(deletion_enhancers_K, file.path(out_dir, "deletion_enhancers_k.csv"), row.names = FALSE)
-  fwrite(deletion_suppressors_L, file.path(out_dir, "deletion_suppressors_l.csv"), row.names = FALSE)
-  fwrite(deletion_suppressors_K, file.path(out_dir, "deletion_suppressors_k.csv"), row.names = FALSE)
-  fwrite(deletion_enhancers_and_suppressors_L, file.path(out_dir, "deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE)
-  fwrite(deletion_enhancers_and_suppressors_K, file.path(out_dir, "deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE)
-
-  return(interaction_scores)  # Return the updated data frame with all calculated columns
-}
-
-publish_zscores <- function(df, out_dir) {
-  zscores <- df %>%
-    dplyr::mutate(
-      zscore_L = scale(Raw_Shift_L, center = TRUE, scale = TRUE),
-      zscore_K = scale(Raw_Shift_K, center = TRUE, scale = TRUE),
-      zscore_r = scale(Raw_Shift_r, center = TRUE, scale = TRUE),
-      zscore_AUC = scale(Raw_Shift_AUC, center = TRUE, scale = TRUE)
-    )
-
-  fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
-}
-
-
-generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
+generate_and_publish_qc <- function(df, out_dir_qc) {
  variables <- c("L", "K", "r", "AUC", "delta_bg")
  
-  # Pre-QC plots
-  lapply(variables, function(var) {
+  # Access delta_bg_tolerance from the dataframe
+  delta_bg_tolerance <- df$delta_bg_tolerance[1]
+  
+  for (var in variables) {
    if (var %in% colnames(df)) {
-      create_and_publish_plot(df, var, "scatter", out_dir_qc)
+      generate_plot(df, var, "scatter", out_dir_qc)
    }
-  })
+  }

-  # Filter data based on delta background tolerance for Post-QC analysis
  df_post_qc <- df %>%
-    mutate(across(all_of(variables),
-                  ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
+    mutate(across(all_of(variables), ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))

-  # Post-QC plots
-  lapply(variables, function(var) {
+  for (var in variables) {
    if (var %in% colnames(df_post_qc)) {
-      create_and_publish_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
+      generate_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
    }
-  })
+  }

-  # For plots specifically for data above the tolerance threshold
  delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
-  lapply(variables, function(var) {
+  for (var in variables) {
    if (var %in% colnames(delta_bg_above_tolerance)) {
-      create_and_publish_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
+      generate_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
    }
-  })
+  }
+
+  df_finite <- df %>% filter(is.finite(delta_bg))
+  delta_bg_frequency_plot <- ggplot(df_finite, aes(delta_bg, color = as.factor(conc_num))) +
+    geom_density() +
+    ggtitle("Density plot for Delta Background by Conc All Data") +
+    theme_publication(legend_position = "right")
+  
+  delta_bg_bar_plot <- ggplot(df_finite, aes(delta_bg, color = as.factor(conc_num))) +
+    geom_bar() +
+    ggtitle("Bar plot for Delta Background by Conc All Data") +
+    theme_publication(legend_position = "right")
+
+  pdf(file = file.path(out_dir_qc, "frequency_delta_background.pdf"), width = 12, height = 8)
+  print(delta_bg_frequency_plot)
+  print(delta_bg_bar_plot)
+  dev.off()
+  
+  saveWidget(ggplotly(delta_bg_frequency_plot),
+    file = file.path(out_dir_qc, "frequency_delta_background_density.html"), selfcontained = TRUE)
+  saveWidget(ggplotly(delta_bg_bar_plot),
+    file = file.path(out_dir_qc, "frequency_delta_background_bar.html"), selfcontained = TRUE)
+
+  df_no_zero <- df %>% filter(L > 0 & is.finite(L))
+
+  scatter_plots <- list()
+  box_plots <- list()
+  for (var in variables) {
+    if (var %in% colnames(df_no_zero)) {
+      scatter_plots[[var]] <- ggplot(df_no_zero, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
+        geom_point(shape = 3, size = 0.2) +
+        stat_summary(fun = mean, geom = "point", size = 0.6) +
+        stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
+        ggtitle(paste("Plate analysis by Drug Conc for", var, "after quality control")) +
+        theme_publication()
+
+      box_plots[[var]] <- ggplot(df_no_zero, aes(as.factor(Scan), .data[[var]], color = as.factor(conc_num))) +
+        geom_boxplot() +
+        ggtitle(paste("Plate analysis by Drug Conc for", var, "after quality control")) +
+        theme_publication()
+    }
+  }
+
+  pdf(file = file.path(out_dir_qc, "plate_analysis_no_zeros.pdf"), width = 14, height = 9)
+  lapply(scatter_plots, print)
+  dev.off()
+  
+  pdf(file = file.path(out_dir_qc, "plate_analysis_no_zeros_boxplots.pdf"), width = 18, height = 9)
+  lapply(box_plots, print)
+  dev.off()
+
+  for (var in names(scatter_plots)) {
+    html_path <- file.path(out_dir_qc, paste0("plate_analysis_no_zeros_", var, ".html"))
+
+    pgg <- suppressWarnings(ggplotly(scatter_plots[[var]]) %>%
+      layout(legend = list(orientation = "h")))
+    saveWidget(pgg, html_path, selfcontained = TRUE)
+  }
+
+  for (var in names(box_plots)) {
+    html_path <- file.path(out_dir_qc, paste0("plate_analysis_no_zeros_boxplots_", var, ".html"))
+
+    pgg <- suppressWarnings(ggplotly(box_plots[[var]]) %>%
+      layout(legend = list(orientation = "h")))
+    saveWidget(pgg, html_path, selfcontained = TRUE)
+  }
 }

-create_rank_plots <- function(interaction_scores, out_dir) {
+compute_experiment_summary_stats <- function(df, out_dir, background_strain = NULL) {
+  if (!is.null(background_strain)) {
+    df <- df %>% filter(OrfRep == background_strain)
+  }
+  
+  summary_stats <- df %>%
+    group_by(OrfRep, conc_num) %>%
+    summarize(
+      N = n(),
+      mean_L = mean(L, na.rm = TRUE),
+      median_L = median(L, na.rm = TRUE),
+      max_L = max(L, na.rm = TRUE),
+      min_L = min(L, na.rm = TRUE),
+      sd_L = sd(L, na.rm = TRUE),
+      se_L = sd_L / sqrt(N),
+      mean_K = mean(K, na.rm = TRUE),
+      median_K = median(K, na.rm = TRUE),
+      max_K = max(K, na.rm = TRUE),
+      min_K = min(K, na.rm = TRUE),
+      sd_K = sd(K, na.rm = TRUE),
+      se_K = sd_K / sqrt(N),
+      mean_r = mean(r, na.rm = TRUE),
+      median_r = median(r, na.rm = TRUE),
+      max_r = max(r, na.rm = TRUE),
+      min_r = min(r, na.rm = TRUE),
+      sd_r = sd(r, na.rm = TRUE),
+      se_r = sd_r / sqrt(N),
+      mean_AUC = mean(AUC, na.rm = TRUE),
+      median_AUC = median(AUC, na.rm = TRUE),
+      max_AUC = max(AUC, na.rm = TRUE),
+      min_AUC = min(AUC, na.rm = TRUE),
+      sd_AUC = sd(AUC, na.rm = TRUE),
+      se_AUC = sd_AUC / sqrt(N)
+    )
+
+  if (is.null(background_strain)) {
+    fwrite(summary_stats, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
+  } else {
+    fwrite(summary_stats, file.path(out_dir, "summary_stats_background_strains.csv"), row.names = FALSE)
+  }
+}
+
+
+filter_and_calculate_sd_k_stats <- function(df, out_dir_qc) {
+  df_cleaned <- df %>%
+    filter(is.finite(L_adjusted) & is.finite(K_adjusted))
+
+  df_within_2SD_K <- df_cleaned %>%
+    filter(abs(K_adjusted - mean(K_adjusted, na.rm = TRUE)) <= 2 * sd(K_adjusted, na.rm = TRUE)) %>%
+    group_by(conc_num) %>%
+    summarize(
+      N = n(),
+      mean_L = mean(L_adjusted, na.rm = TRUE),
+      max_L = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
+      sd_L = sd(L_adjusted, na.rm = TRUE)
+    )
+  fwrite(df_within_2SD_K, file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"))
+
+  df_outside_2SD_K <- df_cleaned %>%
+    filter(abs(K_adjusted - mean(K_adjusted, na.rm = TRUE)) > 2 * sd(K_adjusted, na.rm = TRUE)) %>%
+    group_by(conc_num) %>%
+    summarize(
+      N = n(),
+      mean_L = mean(L_adjusted, na.rm = TRUE),
+      max_L = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
+      sd_L = sd(L_adjusted, na.rm = TRUE)
+    )
+  fwrite(df_outside_2SD_K, file.path(out_dir_qc, "max_observed_L_vals_for_spots_outside_2SD_K.csv"))
+
+  p_raw_before <- ggplot(df_cleaned,
+    aes(x = L_adjusted, y = K_adjusted, color = as.factor(conc_num))) +
+    geom_point() +
+    ggtitle("Raw L vs K before QC") +
+    theme_publication(legend_position = "right")
+  
+  pdf(file.path(out_dir_qc, "raw_L_vs_K_before_QC.pdf"))
+  print(p_raw_before)
+  dev.off()
+  
+  saveWidget(ggplotly(p_raw_before), file.path(out_dir_qc, "raw_L_vs_K_before_QC.html"))
+
+  p_above_threshold <- ggplot(df_cleaned %>% filter(delta_bg > delta_bg_tolerance),
+    aes(x = L_adjusted, y = K_adjusted, color = as.factor(conc_num))) +
+    geom_point() +
+    ggtitle("Raw L vs K for strains above delta background threshold") +
+    theme_publication(legend_position = "right")
+  
+  pdf(file.path(out_dir_qc, "raw_L_vs_K_for_strains_above_delta_bg_threshold.pdf"))
+  print(p_above_threshold)
+  dev.off()
+  
+  saveWidget(ggplotly(p_above_threshold), file.path(out_dir_qc, "raw_L_vs_K_for_strains_above_delta_bg_threshold.html"))
+}
+
+compute_rf_interaction_scores <- function(df_rf, output_dir) {
+  lm_sd_L <- sd(df_rf$lm_Score_L, na.rm = TRUE)
+  lm_sd_K <- sd(df_rf$lm_Score_K, na.rm = TRUE)
+  lm_sd_r <- sd(df_rf$lm_Score_r, na.rm = TRUE)
+  lm_sd_AUC <- sd(df_rf$lm_Score_AUC, na.rm = TRUE)
+  
+  lm_mean_L <- mean(df_rf$lm_Score_L, na.rm = TRUE)
+  lm_mean_K <- mean(df_rf$lm_Score_K, na.rm = TRUE)
+  lm_mean_r <- mean(df_rf$lm_Score_r, na.rm = TRUE)
+  lm_mean_AUC <- mean(df_rf$lm_Score_AUC, na.rm = TRUE)
+  
+  df_rf <- df_rf %>%
+    mutate(
+      Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
+      Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
+      Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
+      Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
+    )
+  
+  df_rf <- df_rf %>%
+      arrange(desc(Z_lm_L), desc(NG))
+  
+  fwrite(df_rf, file.path(output_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
+  
+  return(df_rf)
+}
+
+create_rank_plots <- function(interaction_scores_ranks, out_dir) {
  rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
  
  lapply(rank_vars, function(rank_var) {
-    p <- ggplot(interaction_scores, aes(x = !!sym(rank_var))) +
+    p <- ggplot(interaction_scores_ranks, aes(x = !!sym(rank_var))) +
      geom_bar() +
      ggtitle(paste("Rank Distribution for", rank_var)) +
      theme_publication()
@@ -347,8 +418,7 @@ create_rank_plots <- function(interaction_scores, out_dir) {
    pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
    print(p)
    dev.off()
-
-    # Generate HTML output
+    
    html_path <- sub(".pdf$", ".html", pdf_path)
    pgg <- suppressWarnings(ggplotly(p) %>%
      layout(legend = list(orientation = "h")))
@@ -357,11 +427,9 @@ create_rank_plots <- function(interaction_scores, out_dir) {
 }

 create_correlation_plot <- function(interaction_scores, out_dir) {
-  # Check for non-finite values and remove them from the dataset
  interaction_scores <- interaction_scores %>%
    filter_all(all_vars(is.finite(.)))
-
-  # Generate correlation plots for each pair of variables
+  
  pairs <- list(
    c("Raw_Shift_L", "Raw_Shift_K"),
    c("Raw_Shift_L", "Raw_Shift_r"),
@@ -370,7 +438,7 @@ create_correlation_plot <- function(interaction_scores, out_dir) {
    c("Raw_Shift_K", "Raw_Shift_AUC"),
    c("Raw_Shift_r", "Raw_Shift_AUC")
  )
-
+  
  lapply(pairs, function(vars) {
    p <- ggplot(interaction_scores, aes(x = !!sym(vars[1]), y = !!sym(vars[2]))) +
      geom_point() +
@@ -382,8 +450,7 @@ create_correlation_plot <- function(interaction_scores, out_dir) {
    pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
    print(p)
    dev.off()
-
-    # Generate HTML output
+    
    html_path <- sub(".pdf$", ".html", pdf_path)
    pgg <- suppressWarnings(ggplotly(p, tooltip = c(vars[1], vars[2])) %>%
      layout(legend = list(orientation = "h")))
@@ -397,260 +464,124 @@ process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
  dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
  dir.create(out_dir_qc, showWarnings = FALSE)

-  # Load and preprocess the data
  data <- load_and_preprocess_data(args$easy_results_file, genes)

-  # Calculate delta background tolerance
-  delta_bg_tolerance <- mean(data$delta_bg, na.rm = TRUE) + 3 * sd(data$delta_bg, na.rm = TRUE)
+  # Generate and publish QC plots
+  generate_and_publish_qc(data, out_dir_qc)

-  # Generate and publish QC plots (both pre-QC and post-QC)
-  generate_and_publish_qc(data, delta_bg_tolerance, out_dir_qc)
+  # Calculate and save summary statistics
+  compute_experiment_summary_stats(data, out_dir)
+  compute_experiment_summary_stats(data, out_dir, background_strain = "YDL227C")
+
+  InteractionScores_RF <- data %>%
+    select(OrfRepUnique) %>%
+    distinct() %>%
+    mutate(
+      Gene = NA,
+      Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA,
+      R_Squared_L = NA, Sum_Z_Score_L = NA, Avg_Zscore_L = NA,
+      Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA,
+      R_Squared_K = NA, Sum_Z_Score_K = NA, Avg_Zscore_K = NA,
+      Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA,
+      R_Squared_r = NA, Sum_Z_Score_r = NA, Avg_Zscore_r = NA,
+      Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA,
+      R_Squared_AUC = NA, Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA,
+      NG = NA, SM = NA
+    )
+
+  for (i in seq_along(unique(InteractionScores_RF$OrfRepUnique))) {
+    Gene_Sel <- unique(InteractionScores_RF$OrfRepUnique)[i]
+    X_Gene_Sel <- data[data$OrfRepUnique == Gene_Sel,]
+    
+    X_stats_interaction <- X_Gene_Sel %>%
+      group_by(OrfRepUnique, Gene) %>%
+      summarize(
+        Raw_Shift_L = mean(L_adjusted, na.rm = TRUE),
+        Raw_Shift_K = mean(K_adjusted, na.rm = TRUE),
+        Raw_Shift_r = mean(r_adjusted, na.rm = TRUE),
+        Raw_Shift_AUC = mean(AUC_adjusted, na.rm = TRUE),
+        Z_Shift_L = mean(scale(L_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        Z_Shift_K = mean(scale(K_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        Z_Shift_r = mean(scale(r_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        Z_Shift_AUC = mean(scale(AUC_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        lm_Score_L = coef(lm(L_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_L = summary(lm(L_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        lm_Score_K = coef(lm(K_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_K = summary(lm(K_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        lm_Score_r = coef(lm(r_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_r = summary(lm(r_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        lm_Score_AUC = coef(lm(AUC_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_AUC = summary(lm(AUC_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        NG = sum(NG, na.rm = TRUE),
+        DB = sum(DB, na.rm = TRUE),
+        SM = sum(SM, na.rm = TRUE)
+      )
+    
+    InteractionScores_RF <- InteractionScores_RF %>%
+      mutate(
+        Gene = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Gene[1], Gene),
+        Raw_Shift_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_L[1], Raw_Shift_L),
+        Z_Shift_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_L[1], Z_Shift_L),
+        lm_Score_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_L[1], lm_Score_L),
+        R_Squared_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_L[1], R_Squared_L),
+        Sum_Z_Score_L = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_L, na.rm = TRUE), Sum_Z_Score_L),
+        Avg_Zscore_L = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_L, na.rm = TRUE), Avg_Zscore_L),
+        Raw_Shift_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_K[1], Raw_Shift_K),
+        Z_Shift_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_K[1], Z_Shift_K),
+        lm_Score_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_K[1], lm_Score_K),
+        R_Squared_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_K[1], R_Squared_K),
+        Sum_Z_Score_K = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_K, na.rm = TRUE), Sum_Z_Score_K),
+        Avg_Zscore_K = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_K, na.rm = TRUE), Avg_Zscore_K),
+        Raw_Shift_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_r[1], Raw_Shift_r),
+        Z_Shift_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_r[1], Z_Shift_r),
+        lm_Score_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_r[1], lm_Score_r),
+        R_Squared_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_r[1], R_Squared_r),
+        Sum_Z_Score_r = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_r, na.rm = TRUE), Sum_Z_Score_r),
+        Avg_Zscore_r = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_r, na.rm = TRUE), Avg_Zscore_r),
+        Raw_Shift_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_AUC[1], Raw_Shift_AUC),
+        Z_Shift_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_AUC[1], Z_Shift_AUC),
+        lm_Score_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_AUC[1], lm_Score_AUC),
+        R_Squared_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_AUC[1], R_Squared_AUC),
+        Sum_Z_Score_AUC = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_AUC, na.rm = TRUE), Sum_Z_Score_AUC),
+        Avg_Zscore_AUC = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_AUC, na.rm = TRUE), Avg_Zscore_AUC),
+        NG = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$NG[1], NG),
+        DB = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$DB[1], DB),
+        SM = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$SM[1], SM)
+      )
+  }
+
+  fwrite(InteractionScores_RF, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
+
+  generate_and_publish_qc(data, out_dir_qc)
+
+  filter_and_calculate_sd_k_stats(data, out_dir_qc)

-  # Process and publish summary stats, interaction scores, and z-scores
  variables <- c("L", "K", "r", "AUC", "delta_bg")
-  publish_summary_stats(data, variables, out_dir)
-  interaction_scores <- publish_interaction_scores(data, out_dir)
-  publish_zscores(interaction_scores, out_dir)  # Now writing interaction_scores, not original data
+  compute_experiment_summary_stats(data, variables, out_dir)
+  compute_experiment_summary_stats(data, variables, out_dir, background_strain = "YDL227C")

-  # Generate rank plots and correlation plots
-  create_rank_plots(interaction_scores, out_dir)
-  create_correlation_plot(interaction_scores, out_dir)
+  compute_rf_interaction_scores(InteractionScores_RF, out_dir)
+
+  publish_scores(InteractionScores_RF, out_dir)
+
+  create_rank_plots(InteractionScores_RF, out_dir)
+  create_correlation_plot(InteractionScores_RF, out_dir)

  output_file <- file.path(out_dir, "zscores_interaction.csv")
-  fwrite(interaction_scores, output_file, row.names = FALSE)  # Write interaction_scores here
+  fwrite(InteractionScores_RF, output_file, row.names = FALSE)

  return(output_file)
 }

-# Process all experiments
 processed_files <- lapply(names(args$experiments), function(exp_name) {
  process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir)
 })

-# Combine results from all experiments if multiple experiments exist
 if (length(processed_files) > 1) {
  combined_data <- Reduce(function(x, y) {
-    merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE)
+    merge(fread(x), fread(y), by = "OrfRepUnique", all = TRUE)
  }, processed_files)
-
-combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
-fwrite(combined_data, combined_output_file, row.names = FALSE)
+  
+  combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
+  fwrite(combined_data, combined_output_file, row.names = FALSE)
 }
-
-
-
-
-
-
-
-
-
-
-
-# publish_interaction_scores <- function(df, out_dir) {
-#   interaction_scores <- df %>%
-#     dplyr::group_by(OrfRep) %>%
-#     dplyr::summarize(
-#       mean_L = mean(L, na.rm = TRUE),
-#       mean_K = mean(K, na.rm = TRUE),
-#       mean_r = mean(r, na.rm = TRUE),
-#       mean_AUC = mean(AUC, na.rm = TRUE),
-#       delta_bg_mean = mean(delta_bg, na.rm = TRUE),
-#       delta_bg_sd = sd(delta_bg, na.rm = TRUE)
-#     ) %>%
-#     dplyr::mutate(
-#       l_rank = rank(mean_L),
-#       k_rank = rank(mean_K),
-#       r_rank = rank(mean_r),
-#       auc_rank = rank(mean_AUC)
-#     )
-
-#   fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
-#   fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank),
-#     file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
-
-#   # Additional enhancer and suppressor calculations and outputs
-#   deletion_enhancers_L <- interaction_scores[interaction_scores$mean_L >= 2, ]
-#   deletion_enhancers_L <- deletion_enhancers_L[!is.na(deletion_enhancers_L$OrfRep), ]
-
-#   deletion_enhancers_K <- interaction_scores[interaction_scores$mean_K <= -2, ]
-#   deletion_enhancers_K <- deletion_enhancers_K[!is.na(deletion_enhancers_K$OrfRep), ]
-
-#   deletion_suppressors_L <- interaction_scores[interaction_scores$mean_L <= -2, ]
-#   deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ]
-
-#   deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ]
-#   deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ]
-
-#   deletion_enhancers_and_suppressors_L <- interaction_scores[
-#     interaction_scores$mean_L >= 2 | interaction_scores$mean_L <= -2, ]
-#   deletion_enhancers_and_suppressors_L <- deletion_enhancers_and_suppressors_L[
-#     !is.na(deletion_enhancers_and_suppressors_L$OrfRep), ]
-
-#   deletion_enhancers_and_suppressors_K <- interaction_scores[
-#     interaction_scores$mean_K >= 2 | interaction_scores$mean_K <= -2, ]
-#   deletion_enhancers_and_suppressors_K <- deletion_enhancers_and_suppressors_K[
-#     !is.na(deletion_enhancers_and_suppressors_K$OrfRep), ]
-
-#   # Write CSV files with updated filenames
-#   fwrite(deletion_enhancers_L, file.path(out_dir, "deletion_enhancers_l.csv"), row.names = FALSE)
-#   fwrite(deletion_enhancers_K, file.path(out_dir, "deletion_enhancers_k.csv"), row.names = FALSE)
-#   fwrite(deletion_suppressors_L, file.path(out_dir, "deletion_suppressors_l.csv"), row.names = FALSE)
-#   fwrite(deletion_suppressors_K, file.path(out_dir, "deletion_suppressors_k.csv"), row.names = FALSE)
-#   fwrite(deletion_enhancers_and_suppressors_L, file.path(out_dir, "deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE)
-#   fwrite(deletion_enhancers_and_suppressors_K, file.path(out_dir, "deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE)
-
-#   return(interaction_scores)  # Return the updated data frame with rank columns
-# }
-
-
-
-# publish_zscores <- function(df, out_dir) {
-#   zscores <- df %>%
-#     dplyr::mutate(
-#       zscore_L = scale(L, center = TRUE, scale = TRUE),
-#       zscore_K = scale(K, center = TRUE, scale = TRUE),
-#       zscore_r = scale(r, center = TRUE, scale = TRUE),
-#       zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
-#     )
-
-#   fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
-# }
-
-# generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
-#   variables <- c("L", "K", "r", "AUC", "delta_bg")
-  
-#   # Pre-QC plots
-#   lapply(variables, function(var) {
-#     if (var %in% colnames(df)) {
-#       create_and_publish_plot(df, var, "scatter", out_dir_qc)
-#     }
-#   })
-
-#   # Filter data based on delta background tolerance for Post-QC analysis
-#   df_post_qc <- df %>%
-#     mutate(across(all_of(variables), 
-#                   ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
-
-#   # Post-QC plots
-#   lapply(variables, function(var) {
-#     if (var %in% colnames(df_post_qc)) {
-#       create_and_publish_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
-#     }
-#   })
-
-#   # For plots specifically for data above the tolerance threshold
-#   delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
-#   lapply(variables, function(var) {
-#     if (var %in% colnames(delta_bg_above_tolerance)) {
-#       create_and_publish_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
-#     }
-#   })
-# }
-
-
-
-# # Create rank plots
-# create_rank_plots <- function(interaction_scores, out_dir) {
-#   rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
-  
-#   lapply(rank_vars, function(rank_var) {
-#     p <- ggplot(interaction_scores, aes(x = !!sym(rank_var))) +
-#       geom_bar() +
-#       ggtitle(paste("Rank Distribution for", rank_var)) +
-#       theme_publication()
-    
-#     pdf_path <- file.path(out_dir, paste0("rank_distribution_", rank_var, ".pdf"))
-#     pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
-#     print(p)
-#     dev.off()
-
-#     # Generate HTML output
-#     html_path <- sub(".pdf$", ".html", pdf_path)
-#     pgg <- suppressWarnings(ggplotly(p) %>%
-#       layout(legend = list(orientation = "h")))
-#     saveWidget(pgg, html_path, selfcontained = TRUE)
-#   })
-# }
-
-# create_correlation_plot <- function(interaction_scores, out_dir) {
-#   # Check for non-finite values and remove them from the dataset
-#   interaction_scores <- interaction_scores %>%
-#     filter_all(all_vars(is.finite(.)))
-
-#   # Generate correlation plots for each pair of variables
-#   pairs <- list(
-#     c("mean_L", "mean_K"),
-#     c("mean_L", "mean_r"),
-#     c("mean_L", "mean_AUC"),
-#     c("mean_K", "mean_r"),
-#     c("mean_K", "mean_AUC"),
-#     c("mean_r", "mean_AUC")
-#   )
-
-#   lapply(pairs, function(vars) {
-#     p <- ggplot(interaction_scores, aes(x = !!sym(vars[1]), y = !!sym(vars[2]))) +
-#       geom_point() +
-#       geom_smooth(method = "lm", se = FALSE) +
-#       ggtitle(paste("Correlation between", vars[1], "and", vars[2])) +
-#       theme_publication()
-    
-#     pdf_path <- file.path(out_dir, paste0("correlation_", vars[1], "_", vars[2], ".pdf"))
-#     pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
-#     print(p)
-#     dev.off()
-
-#     # Generate HTML output
-#     html_path <- sub(".pdf$", ".html", pdf_path)
-#     pgg <- suppressWarnings(ggplotly(p, tooltip = c(vars[1], vars[2])) %>%
-#       layout(legend = list(orientation = "h")))
-#     saveWidget(pgg, html_path, selfcontained = TRUE)
-#   })
-# }
-
-
-# process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
-#   out_dir <- file.path(exp_dir, "zscores")
-#   out_dir_qc <- file.path(out_dir, "qc")
-#   dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
-#   dir.create(out_dir_qc, showWarnings = FALSE)
-
-#   # Load and preprocess the data
-#   data <- load_and_preprocess_data(args$easy_results_file, genes)
-
-#   # Calculate delta background tolerance
-#   delta_bg_tolerance <- mean(data$delta_bg, na.rm = TRUE) + 3 * sd(data$delta_bg, na.rm = TRUE)
-
-#   # Generate and publish QC plots (both pre-QC and post-QC)
-#   generate_and_publish_qc(data, delta_bg_tolerance, out_dir_qc)
-
-#   # Process and publish summary stats, interaction scores, and z-scores
-#   variables <- c("L", "K", "r", "AUC", "delta_bg")
-#   publish_summary_stats(data, variables, out_dir)
-#   interaction_scores <- publish_interaction_scores(data, out_dir)
-#   publish_zscores(data, out_dir)
-
-#   # Generate rank plots and correlation plots
-#   create_rank_plots(interaction_scores, out_dir)
-#   create_correlation_plot(interaction_scores, out_dir)
-
-#   output_file <- file.path(out_dir, "zscores_interaction.csv")
-#   fwrite(data, output_file, row.names = FALSE)
-
-#   return(output_file)
-# }
-
-# # Process all experiments
-# processed_files <- lapply(names(args$experiments), function(exp_name) {
-#   process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir)
-# })
-
-# # Combine results from all experiments if multiple experiments exist
-# if (length(processed_files) > 1) {
-#   combined_data <- Reduce(function(x, y) {
-#     merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE)
-#   }, processed_files)
-
-#   combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
-#   fwrite(combined_data, combined_output_file, row.names = FALSE)
-# }