Add untracked files

2024-08-23 04:01:30 -04:00
parent 1502f6bf38
commit 3098a9dda5
2 changed files with 603 additions and 0 deletions
--- a/workflow/.old/apps/r/calculate_interaction_zscores.R
+++ b/workflow/.old/apps/r/calculate_interaction_zscores.R
@@ -0,0 +1,353 @@
+suppressMessages({
+  if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.")
+  if (!require("plotly")) stop("Package plotly is required but not installed.")
+  if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.")
+  if (!require("dplyr")) stop("Package dplyr is required but not installed.")
+  if (!require("ggthemes")) stop("Package ggthemes is required but not installed.")
+  if (!require("plyr")) stop("Package plyr is required but not installed.")
+})
+
+# Constants for configuration
+PLOT_WIDTH <- 14
+PLOT_HEIGHT <- 9
+BASE_SIZE <- 14
+
+options(warn = 2, max.print = 100)
+
+parse_arguments <- function() {
+  args <- if (interactive()) {
+    c(
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
+      3,
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
+      "Experiment 1: Doxo versus HLD",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
+      "Experiment 2: HLD versus Doxo"
+    )
+  } else {
+    commandArgs(trailingOnly = TRUE)
+  }
+  
+  paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
+  names <- args[seq(6, length(args), by = 2)]
+  experiments <- setNames(paths, names)
+  
+  list(
+    out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
+    sd = as.numeric(args[2]),
+    sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE),
+    easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE),
+    experiments = experiments
+  )
+}
+
+args <- parse_arguments()
+
+dir.create(args$out_dir, showWarnings = FALSE)
+
+# Define themes and scales
+theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
+  theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.line = element_line(colour = "black"),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "bottom",
+      legend.direction = "horizontal",
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+}
+
+theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
+  theme_publication(base_size, base_family) +
+    theme(
+      legend.position = "right",
+      legend.direction = "vertical",
+      legend.key.size = unit(0.5, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic")
+    )
+}
+
+scale_fill_publication <- function(...) {
+  discrete_scale("fill", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+scale_colour_publication <- function(...) {
+  discrete_scale("colour", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+# Load SGD gene list
+sgd_genes <- function(sgd_gene_list) {
+  read.delim(file = sgd_gene_list, quote = "", header = FALSE,
+             colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
+    dplyr::rename(ORF = V4, GeneName = V5)
+}
+
+genes <- sgd_genes(args$sgd_gene_list)
+
+load_and_preprocess_data <- function(easy_results_file, genes) {
+  df <- tryCatch({
+    read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
+  }, error = function(e) {
+    stop("Error reading file: ", easy_results_file, "\n", e$message)
+  }) %>%
+    filter(!.[[1]] %in% c("", "Scan"))
+  
+  numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
+  df[numeric_columns[numeric_columns %in% colnames(df)]] <- 
+    lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
+  
+  df <- df %>%
+    mutate(
+      L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
+      AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
+      conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA,
+      delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.)))
+        LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA},
+      GeneName = vapply(ORF, function(orf) {
+        gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
+        ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
+      }, character(1))
+    )
+  
+  if (nrow(df) == 0) warning("Dataframe is empty after filtering")
+  
+  return(df)
+}
+
+create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
+  plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
+  filtered_df <- filter(df, is.finite(.data[[var]]))
+  
+  p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
+    plot_func(shape = 3, size = 0.2, position = "jitter") +
+    stat_summary(fun = mean, geom = "point", size = 0.6) +
+    stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
+    ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
+    theme_publication()
+  
+  pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))
+  pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
+  print(p)
+  dev.off()
+  
+  html_path <- sub(".pdf$", ".html", pdf_path)
+  pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
+                            layout(legend = list(orientation = "h")))
+  saveWidget(pgg, html_path, selfcontained = TRUE)
+}
+
+publish_summary_stats <- function(df, variables, out_dir) {
+  stats_list <- lapply(variables, function(var) {
+    df %>%
+      dplyr::group_by(OrfRep, conc_num) %>%
+      dplyr::summarize(
+        N = dplyr::n(),
+        mean_val = mean(.data[[var]], na.rm = TRUE),
+        sd_val = sd(.data[[var]], na.rm = TRUE),
+        se_val = sd_val / sqrt(N)
+      )
+  })
+  summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
+  write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
+}
+
+publish_interaction_scores <- function(df, out_dir) {
+  interaction_scores <- df %>%
+    group_by(OrfRep) %>%
+    summarize(
+      mean_L = mean(L, na.rm = TRUE),
+      mean_K = mean(K, na.rm = TRUE),
+      mean_r = mean(r, na.rm = TRUE),
+      mean_AUC = mean(AUC, na.rm = TRUE),
+      delta_bg_mean = mean(delta_bg, na.rm = TRUE),
+      delta_bg_sd = sd(delta_bg, na.rm = TRUE)
+    ) %>%
+    mutate(
+      l_rank = rank(mean_L),
+      k_rank = rank(mean_K),
+      r_rank = rank(mean_r),
+      auc_rank = rank(mean_AUC)
+    )
+  
+  write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
+  write.csv(arrange(interaction_scores, l_rank, k_rank),
+            file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
+}
+
+publish_zscores <- function(df, out_dir) {
+  zscores <- df %>%
+    mutate(
+      zscore_L = scale(L, center = TRUE, scale = TRUE),
+      zscore_K = scale(K, center = TRUE, scale = TRUE),
+      zscore_r = scale(r, center = TRUE, scale = TRUE),
+      zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
+    )
+  
+  write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
+}
+
+generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
+  variables <- c("L", "K", "r", "AUC", "delta_bg")
+  lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc)
+  delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance)
+  lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance,
+         plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance")
+}
+
+process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) {
+  out_dir <- file.path(exp_dir, "zscores")
+  out_dir_qc <- file.path(exp_dir, "qc")
+  dir.create(out_dir, showWarnings = FALSE)
+  dir.create(out_dir_qc, showWarnings = FALSE)
+  df <- load_and_preprocess_data(easy_results_file, genes)
+  delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE)
+  
+  generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc)
+  
+  variables <- c("L", "K", "r", "AUC", "delta_bg")
+  publish_summary_stats(df, variables, out_dir)
+  publish_interaction_scores(df, out_dir)
+  publish_zscores(df, out_dir)
+  
+  list(
+    zscores_file = file.path(out_dir, "zscores_interaction.csv"),
+    exp_name = exp_name
+  )
+}
+
+processed_experiments <- lapply(names(args$experiments), function(exp_name) {
+  process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file)
+})
+
+zscores_files <- sapply(processed_experiments, `[[`, "zscores_file")
+exp_names <- sapply(processed_experiments, `[[`, "exp_name")
+
+# Sequentially join z-scores similar to original logic
+combine_zscores <- function(zscores_files) {
+  if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
+  
+  if (length(zscores_files) == 2) {
+    X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
+    X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
+    X <- plyr::join(X1, X2, by = "OrfRep", type = "full")
+    
+  } else if (length(zscores_files) == 3) {
+    X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
+    X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
+    X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
+    X <- plyr::join(X1, X2, by = "OrfRep", type = "full")
+    X <- plyr::join(X, X3, by = "OrfRep", type = "full")
+    
+  } else if (length(zscores_files) == 4) {
+    X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
+    X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
+    X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
+    X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE)
+    X <- plyr::join(X1, X2, by = "OrfRep", type = "full")
+    X <- plyr::join(X, X3, by = "OrfRep", type = "full")
+    X <- plyr::join(X, X4, by = "OrfRep", type = "full")
+  }
+  
+  X
+}
+
+process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) {
+  OBH <- joined_data[, order(colnames(joined_data))]  # OrderByHeader
+  
+  headSel <- OBH %>%
+    select(contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'), contains('Z_lm_L'), contains('Z_Shift_L')) %>%
+    select(-contains("Gene."))
+  
+  headSel2 <- OBH %>%
+    select(contains('OrfRep'), matches('Gene')) %>%
+    select(-contains("Gene."))
+  
+  # Fill NA values
+  fill_na_in_columns <- function(data) {
+    for (header in colnames(data)) {
+      if (nrow(data) == 0) {
+        warning(paste("Empty data frame for column:", header))
+      }
+      if (grepl("Shift", header)) {
+        data[[header]][is.na(data[[header]])] <- 0.001
+      } else if (grepl("Z_lm_", header)) {
+        data[[header]][is.na(data[[header]])] <- 0.0001
+      }
+    }
+    data
+  }
+  
+  headSel <- fill_na_in_columns(headSel)
+  
+  # Filter by standard deviation
+  filter_by_sd <- function(data, sd) {
+    if (sd == 0) return(data)
+    
+    z_lm_cols <- select(data, contains("z_lm_"))
+    filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
+    data[filter_vector, ]
+  }
+  
+  REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd)
+  shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd)
+  
+  # Reorder columns
+  reorder_columns <- function(data1, data2) {
+    combined_data <- data1
+    for (i in 3:ncol(data1)) {
+      if (nrow(data1) == 0 | nrow(data2) == 0) {
+        next
+      }
+      combined_data <- cbind(combined_data, data2[i], data1[i])
+    }
+    combined_data
+  }
+  
+  combI <- reorder_columns(headSel2, shiftOnly)
+  
+  write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+  write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+  
+  # Relabel headers
+  relabel_headers <- function(headers, exp_names) {
+    new_labels <- headers
+    for (i in seq_along(headers)) {
+      suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
+      if (suffix %in% seq_along(exp_names)) {
+        exp_name <- exp_names[as.numeric(suffix)]
+        new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
+      }
+    }
+    new_labels
+  }
+  
+  colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names)
+  colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names)
+  
+  write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+  write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+}
+
+joined_data <- combine_zscores(zscores_files)
+process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names)
--- a/workflow/.old/apps/r/interactions.R
+++ b/workflow/.old/apps/r/interactions.R
@@ -0,0 +1,250 @@
+suppressMessages({
+  library("ggplot2")
+  library("plotly")
+  library("htmlwidgets")
+  library("dplyr")
+  library("ggthemes")
+})
+
+# Constants for configuration
+PLOT_WIDTH <- 14
+PLOT_HEIGHT <- 9
+BASE_SIZE <- 14
+# BACKUP_SUFFIX <- paste0(".backup_", Sys.Date())
+
+options(warn = 2, max.print = 100)
+
+# Parse arguments
+parse_arguments <- function() {
+  if (interactive()) {
+    args <- c(
+      "1",
+      "3",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2/results_std.txt",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1/zscores"
+    )
+  } else {
+    args <- commandArgs(trailingOnly = TRUE)
+  }
+  list(
+    exp_number = as.numeric(args[1]),
+    delta_bg_factor = as.numeric(args[2]),
+    study_info_file = normalizePath(file.path(args[3]), mustWork = FALSE),
+    sgd_gene_list = normalizePath(file.path(args[4]), mustWork = FALSE),
+    input_file = normalizePath(file.path(args[5]), mustWork = FALSE),
+    out_dir = normalizePath(file.path(args[6]), mustWork = FALSE),
+    out_dir_qc = normalizePath(file.path(args[6], "qc"), mustWork = FALSE)
+  )
+}
+
+args <- parse_arguments()
+dir.create(args$out_dir, showWarnings = FALSE)
+dir.create(args$out_dir_qc, showWarnings = FALSE)
+
+# Define themes and scales
+theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
+  theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.line = element_line(colour = "black"),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "bottom",
+      legend.direction = "horizontal",
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+}
+
+theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
+  theme_publication(base_size, base_family) +
+    theme(
+      legend.position = "right",
+      legend.direction = "vertical",
+      legend.key.size = unit(0.5, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic")
+    )
+}
+
+scale_fill_publication <- function(...) {
+  discrete_scale("fill", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+scale_colour_publication <- function(...) {
+  discrete_scale("colour", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+
+# Load and preprocess data
+load_and_preprocess_data <- function(input_file, genes) {
+  df <- tryCatch({
+    read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
+  }, error = function(e) {
+    stop("Error reading file: ", input_file, "\n", e$message)
+  })
+  
+  # Filter out rows where the first column is empty or "Scan"
+  df <- df[!df[[1]] %in% c("", "Scan"), ]
+  
+  # Convert specific columns to numeric
+  numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
+  df[numeric_columns[numeric_columns %in% colnames(df)]] <- lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
+  
+  # Handle missing critical columns with warnings
+  df$L <- if ("l" %in% colnames(df)) df$l else {warning("Missing column: l"); NA}
+  df$AUC <- if ("AUC96" %in% colnames(df)) df$AUC96 else {warning("Missing column: AUC96"); NA}
+  df$conc_num <- if ("Conc" %in% colnames(df)) as.numeric(gsub("[^0-9\\.]", "", df$Conc)) else NA
+  df$delta_bg <- if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(df))) df$LstBackgrd - df$X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA}
+  
+  # Replace ORF IDs with Gene Names using the SGD gene list
+  df$GeneName <- vapply(df$ORF, function(orf) {
+    gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
+    ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
+  }, character(1))
+
+  if (nrow(df) == 0) warning("Dataframe is empty after filtering")
+  
+  return(df)
+}
+
+
+df <- load_and_preprocess_data(args$input_file)
+
+delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + args$delta_bg_factor * sd(df$delta_bg, na.rm = TRUE)
+message("Exp", args$exp_number, ": The delta_bg_factor is: ", args$delta_bg_factor)
+message("Exp", args$exp_number, ": The delta_bg_tolerance is ", delta_bg_tolerance)
+
+df <- df %>%
+  mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
+    filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
+
+# Create plot
+create_plot <- function(df, var, plot_type) {
+  filtered_df <- df %>% filter(is.finite(.data[[var]]))
+  p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
+    ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
+    theme_publication()
+  
+  if (plot_type == "scatter") {
+    p <- p +
+      geom_point(shape = 3, size = 0.2, position = "jitter") +
+      stat_summary(fun = mean, geom = "point", size = 0.6) +
+      stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar")
+  } else {
+    p <- p + geom_boxplot()
+  }
+  
+  return(p)
+}
+
+# Publish plot to PDF and HTML (Plotly)
+publish_plot <- function(plot, plot_path) {
+  # if (file.exists(plot_path)) {
+  #   file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
+  # }
+  
+  pdf(plot_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
+  print(plot)
+  dev.off()
+  
+  pgg <- suppressWarnings(ggplotly(plot,
+    tooltip = c("L", "K", "ORF", "Gene", "delta_bg")) %>%
+      layout(legend = list(orientation = "h")))
+  saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
+}
+
+# Publish multiple plots
+publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
+  lapply(variables, function(var) {
+    plot <- create_plot(df, var, plot_type)
+    publish_plot(plot, file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf")))
+  })
+}
+
+# Calculate and publish summary statistics
+publish_summary_stats <- function(df, variables, out_dir) {
+  stats_list <- lapply(variables, function(var) {
+    df %>%
+      group_by(OrfRep, conc_num) %>%
+      summarize(
+        N = n(),
+        mean_val = mean(.data[[var]], na.rm = TRUE),
+        sd_val = sd(.data[[var]], na.rm = TRUE),
+        se_val = sd_val / sqrt(N)
+      )
+  })
+  summary_stats_df <- bind_rows(stats_list, .id = "variable")
+  write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
+}
+
+# Calculate and publish interaction scores
+publish_interaction_scores <- function(df, out_dir) {
+  interaction_scores <- df %>%
+    group_by(OrfRep) %>%
+    summarize(
+      mean_L = mean(L, na.rm = TRUE),
+      mean_K = mean(K, na.rm = TRUE),
+      mean_r = mean(r, na.rm = TRUE),
+      mean_AUC = mean(AUC, na.rm = TRUE),
+      delta_bg_mean = mean(delta_bg, na.rm = TRUE),
+      delta_bg_sd = sd(delta_bg, na.rm = TRUE)
+    ) %>%
+    mutate(
+      l_rank = rank(mean_L),
+      k_rank = rank(mean_K),
+      r_rank = rank(mean_r),
+      auc_rank = rank(mean_AUC)
+    )
+  
+  write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
+  write.csv(interaction_scores %>%
+    arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
+}
+
+# Publish z-scores
+publish_zscores <- function(df, out_dir) {
+  zscores <- df %>%
+    mutate(
+      zscore_L = scale(L, center = TRUE, scale = TRUE),
+      zscore_K = scale(K, center = TRUE, scale = TRUE),
+      zscore_r = scale(r, center = TRUE, scale = TRUE),
+      zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
+    )
+  
+  write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
+}
+
+# QC plot generation and publication
+generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
+  publish_multiple_plots(df, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc)
+  
+  delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
+  publish_multiple_plots(delta_bg_above_tolerance, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc, "_above_tolerance")
+}
+
+# Run QC plot generation and publication
+generate_and_publish_qc(df, delta_bg_tolerance, args$out_dir_qc)
+
+# Run summary statistics, interaction score, and z-score calculations and publications
+variables <- c("L", "K", "r", "AUC", "delta_bg")
+publish_summary_stats(df, variables, args$out_dir)
+publish_interaction_scores(df, args$out_dir)
+publish_zscores(df, args$out_dir)