Before old plotting removal

2024-09-04 02:03:23 -04:00
parent 111909914c
commit d52903f8fa
2 changed files with 327 additions and 282 deletions
--- a/workflow/apps/r/calculate_interaction_zscores5.R
+++ b/workflow/apps/r/calculate_interaction_zscores5.R
@@ -163,102 +163,6 @@ update_gene_names <- function(df, sgd_gene_list) {
  return(df)
 }
 generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num", title, x_label = NULL, y_label = NULL) {
  # Start the ggplot with just the x_var and color_var
  plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var)))) +
    ggtitle(title) +
    theme_publication()
  # If y_var is not NULL, add it to the plot aesthetics
  if (!is.null(y_var)) {
    plot <- plot + aes(y = !!sym(y_var))
  }
  # Handle different plot types
  plot <- switch(plot_type,
    "scatter" = plot + geom_point(shape = 3, size = 0.2, position = "jitter") +
      stat_summary(fun.data = mean_sdl, geom = "errorbar") +
      stat_summary(fun = mean, geom = "point", size = 0.6),
    "box" = plot + geom_boxplot(),
    "density" = plot + geom_density(),
    "bar" = plot + geom_bar(stat = ifelse(is.null(y_var), "count", "identity"))
  )
  # Add optional labels if provided
  if (!is.null(x_label)) plot <- plot + xlab(x_label)
  if (!is.null(y_label)) plot <- plot + ylab(y_label)
  return(plot)
 }
 generate_and_save_plots <- function(df, output_dir, prefix, variables, include_qc = FALSE) {
  plots <- list()
  if (nrow(df) == 0) {
    message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots")
    return()
  }
  message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
  for (var in variables) {
    scatter_plot <-
      generate_plot(df, x_var = "scan", y_var = var, plot_type = "scatter",
        title = paste(prefix, "Scatter Plot for", var))
    boxplot <-
      generate_plot(df, x_var = "scan", y_var = var, plot_type = "box",
        title = paste(prefix, "Box Plot for", var))
    plots[[paste0(var, "_scatter")]] <- scatter_plot
    plots[[paste0(var, "_box")]] <- boxplot
  }
  if (include_qc) {
    plots[["Raw_L_vs_K"]] <-
      generate_plot(df, x_var = "L", y_var = "K", plot_type = "scatter",
        title = "Raw L vs K before QC")
    plots[["Delta_bg_Density"]] <-
      generate_plot(df, x_var = "delta_bg", plot_type = "density", color_var = "conc_num",
        title = "Density plot for Delta Background by Conc All Data")
    plots[["Delta_bg_Bar"]] <-
      generate_plot(df, x_var = "delta_bg", plot_type = "bar",
        title = "Bar plot for Delta Background by Conc All Data")
  }
  save_plots(prefix, plots, output_dir)
 }
 # Ensure all plots are saved and printed to PDF
 save_plots <- function(file_name, plot_list, output_dir) {
  # Save to PDF
  pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
  lapply(plot_list, function(plot) {
    print(plot)
  })
  dev.off()
  # Save to HTML with horizontal legend orientation
  lapply(names(plot_list), function(plot_name) {
    message("Generating plot: ", plot_name)
    pgg <- tryCatch({
      suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
        layout(legend = list(orientation = "h")))
    }, error = function(e) {
      message("Error in plot: ", plot_name, "\n", e)
      return(NULL)
    })
    if (!is.null(pgg)) {
      saveWidget(pgg,
        file = file.path(output_dir,
        paste0(file_name, "_", plot_name, ".html")),
        selfcontained = TRUE)
    }
  })
 }
 # Process strains (deletion and reference)
 process_strains <- function(df) {
  df_strains <- data.frame()  # Initialize an empty dataframe to store results
@@ -295,7 +199,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
      max = ~max(.x, na.rm = TRUE),
      min = ~min(.x, na.rm = TRUE),
      sd = ~sd(.x, na.rm = TRUE),
-      se = ~sd(.x, na.rm = TRUE) / sqrt(n() - 1)
+      se = ~sd(.x, na.rm = TRUE) / sqrt(n() - 1) # TODO why - 1?
    ), .names = "{.col}_{.fn}"))
  return(summary_stats)
@@ -305,48 +209,70 @@ calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_
  # Pull the background means
  print("Calculating background means")
-  l_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(L_mean)
+  L_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(L_mean)
-  k_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(K_mean)
+  K_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(K_mean)
  r_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(r_mean)
-  auc_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(AUC_mean)
+  AUC_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(AUC_mean)
-  # Calculate all necessary statistics and shifts in one step
+  L_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(L_sd)
  K_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(K_sd)
  r_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(r_sd)
  AUC_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(AUC_sd)
  # Calculate summary statistics and shifts
  print("Calculating interaction scores part 1")
  interaction_scores_all <- df %>%
    group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
    summarise(
      N = n(),
-      across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
+      L_mean = mean(L),
-      NG = sum(NG, na.rm = TRUE),
+      L_median = median(L),
-      SM = sum(SM, na.rm = TRUE),
+      L_sd = sd(L),
-      raw_shift_l = mean(L, na.rm = TRUE) - l_mean_bg,
+      L_se = L_sd / sqrt(N),
-      raw_shift_k = mean(K, na.rm = TRUE) - k_mean_bg,
+      K_mean = mean(K),
-      raw_shift_r = mean(r, na.rm = TRUE) - r_mean_bg,
+      K_median = median(K),
-      raw_shift_auc = mean(AUC, na.rm = TRUE) - auc_mean_bg,
+      K_sd = sd(K),
-      z_shift_l = raw_shift_l / L_sd[1],
+      K_se = K_sd / sqrt(N),
-      z_shift_k = raw_shift_k / K_sd[1],
+      r_mean = mean(r),
-      z_shift_r = raw_shift_r / r_sd[1],
+      r_median = median(r),
-      z_shift_auc = raw_shift_auc / AUC_sd[1],
+      r_sd = sd(r),
-      exp_l = l_mean_bg + raw_shift_l,
+      r_se = r_sd / sqrt(N),
-      exp_k = k_mean_bg + raw_shift_k,
+      AUC_mean = mean(AUC),
-      exp_r = r_mean_bg + raw_shift_r,
+      AUC_median = median(AUC),
-      exp_auc = auc_mean_bg + raw_shift_auc,
+      AUC_sd = sd(AUC),
-      delta_l = mean(L, na.rm = TRUE) - exp_l,
+      AUC_se = AUC_sd / sqrt(N),
-      delta_k = mean(K, na.rm = TRUE) - exp_k,
+      NG = sum(NG),
-      delta_r = mean(r, na.rm = TRUE) - exp_r,
+      DB = sum(DB),
-      delta_auc = mean(AUC, na.rm = TRUE) - exp_auc,
+      SM = sum(SM),
-      zscore_l = delta_l / L_sd,
+      Raw_Shift_L = L_mean - L_mean_bg[[1]],
-      zscore_k = delta_k / K_sd,
+      Raw_Shift_K = K_mean - K_mean_bg[[1]],
-      zscore_r = delta_r / r_sd,
+      Raw_Shift_r = r_mean - r_mean_bg[[1]],
-      zscore_auc = delta_auc / AUC_sd,
+      Raw_Shift_AUC = AUC_mean - AUC_mean_bg[[1]],
-      sum_z_score_l = sum(zscore_l, na.rm = TRUE),
+      Z_Shift_L = Raw_Shift_L / L_sd[[0]],
-      avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)),
+      Z_Shift_K = Raw_Shift_K / K_sd[[0]],
-      sum_z_score_k = sum(zscore_k, na.rm = TRUE),
+      Z_Shift_r = Raw_Shift_r / r_sd[[0]],
-      avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)),
+      Z_Shift_AUC = Raw_Shift_AUC / AUC_sd[[0]],
-      sum_z_score_r = sum(zscore_r, na.rm = TRUE),
+      WT_l = df$L_mean,
-      avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)),
+      WT_K = df$K_mean,
-      sum_z_score_auc = sum(zscore_auc, na.rm = TRUE),
+      WT_r = df$r_mean,
-      avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE))
+      WT_AUC = df$AUC_mean,
      WT_sd_l = L_sd_bg[[1]],
      WT_sd_K = K_sd_bg[[1]],
      WT_sd_r = r_sd_bg[[1]],
      WT_sd_AUC = AUC_sd_bg[[1]],
      Exp_L = L_mean_bg[[1]] + Raw_Shift_L,
      Exp_K = K_mean_bg[[1]] + Raw_Shift_K,
      Exp_r = r_mean_bg[[1]] + Raw_Shift_r,
      Exp_AUC = AUC_mean_bg[[1]] + Raw_Shift_AUC,
      Delta_L = ifelse(NG == 1, mean(L) - WT_l, Delta_L),
      Delta_L = ifelse(SM == 1, mean(L) - WT_l, Delta_L),
      Delta_K = ifelse(NG == 1, mean(K) - WT_K, Delta_K),
      Delta_r = ifelse(NG == 1, mean(r) - WT_r, Delta_r),
      Delta_AUC = ifelse(NG == 1, mean(AUC) - WT_AUC, Delta_AUC),
      Zscore_L = Delta_L / WT_sd_l,
      Zscore_K = Delta_K / WT_sd_K,
      Zscore_r = Delta_r / WT_sd_r,
      Zscore_AUC = Delta_AUC / WT_sd_AUC,
    ) %>%
    ungroup()
@@ -355,10 +281,10 @@ calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_
  interaction_scores <- interaction_scores_all %>%
    group_by(across(all_of(group_vars))) %>%
    summarise(
-      lm_l = list(lm(delta_l ~ conc_num_factor)),
+      lm_l = list(lm(Delta_L ~ conc_num_factor)),
-      lm_k = list(lm(delta_k ~ conc_num_factor)),
+      lm_k = list(lm(Delta_K ~ conc_num_factor)),
-      lm_r = list(lm(delta_r ~ conc_num_factor)),
+      lm_r = list(lm(Delta_r ~ conc_num_factor)),
-      lm_auc = list(lm(delta_auc ~ conc_num_factor)),
+      lm_auc = list(lm(Delta_AUC ~ conc_num_factor)),
      lm_Score_L = max_conc * coef(lm_l[[1]])[2] + coef(lm_l[[1]])[1],
      lm_Score_K = max_conc * coef(lm_k[[1]])[2] + coef(lm_k[[1]])[1],
      lm_Score_r = max_conc * coef(lm_r[[1]])[2] + coef(lm_r[[1]])[1],
@@ -367,66 +293,192 @@ calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_
      R_Squared_K = summary(lm_k[[1]])$r.squared,
      R_Squared_r = summary(lm_r[[1]])$r.squared,
      R_Squared_AUC = summary(lm_auc[[1]])$r.squared,
-      Sum_Z_Score_L = sum(zscore_l, na.rm = TRUE),
+      Sum_Z_Score_L = sum(Zscore_L),
-      Avg_Zscore_L = avg_zscore_l[1],
+      Avg_Zscore_L = Sum_Z_Score_L / (length(variables) - sum(NG)),
-      Sum_Z_Score_K = sum(zscore_k, na.rm = TRUE),
+      Sum_Z_Score_K = sum(Zscore_K),
-      Avg_Zscore_K = avg_zscore_k[1],
+      Avg_Zscore_K = Sum_Z_Score_K / (length(variables) - sum(NG)),
-      Sum_Z_Score_r = sum(zscore_r, na.rm = TRUE),
+      Sum_Z_Score_r = sum(Zscore_r),
-      Avg_Zscore_r = avg_zscore_r[1],
+      Avg_Zscore_r = Sum_Z_Score_r / (length(variables) - sum(NG)),
-      Sum_Z_Score_AUC = sum(zscore_auc, na.rm = TRUE),
+      Sum_Z_Score_AUC = sum(Zscore_AUC),
-      Avg_Zscore_AUC = avg_zscore_auc[1],
+      Avg_Zscore_AUC = Sum_Z_Score_AUC / (length(variables) - sum(NG)),
-      NG = sum(NG, na.rm = TRUE),
+      NG = sum(NG),
-      DB = sum(DB, na.rm = TRUE),
+      DB = sum(DB),
-      SM = sum(SM, na.rm = TRUE)
+      SM = sum(SM)
    ) %>%
    ungroup()
  # Ensure interaction_scores has only the specified columns
  interaction_scores <- interaction_scores %>%
    select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l,
           raw_shift_k, z_shift_k, lm_Score_K, Z_lm_K = lm_Score_K, R_Squared_K, sum_z_score_k, avg_zscore_k,
           raw_shift_r, z_shift_r, lm_Score_r, Z_lm_r = lm_Score_r, R_Squared_r, sum_z_score_r, avg_zscore_r,
           raw_shift_auc, z_shift_auc, lm_Score_AUC, Z_lm_AUC = lm_Score_AUC, R_Squared_AUC, sum_z_score_auc, avg_zscore_auc,
           NG, DB, SM)
  return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
 }
 generate_rf_plots <- function(df_calculations, df_interactions, output_dir, file_prefix = "RF") {
  variables <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")
  WT_sds <- list(WT_sd_l = 2, WT_sd_K = 2, WT_sd_r = 0.65, WT_sd_AUC = 6500)
-  plot_list <- lapply(seq_along(variables), function(i) {
+# generate_rf_plots <- function(df_calculations, df_interactions, output_dir, file_prefix = "RF") {
-    var <- variables[i]
+#   variables <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")
-    WT_sd <- WT_sds[[i]]
+#   WT_sds <- list(WT_sd_l = 2, WT_sd_K = 2, WT_sd_r = 0.65, WT_sd_AUC = 6500)
-    ggplot(df_calculations, aes(conc_num_factor, !!sym(var))) +
+#   plot_list <- lapply(seq_along(variables), function(i) {
-      geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
+#     var <- variables[i]
-      coord_cartesian(ylim = c(-WT_sd, WT_sd)) +
+#     WT_sd <- WT_sds[[i]]
      geom_errorbar(aes(ymin = 0 - (2 * WT_sd), ymax = 0 + (2 * WT_sd)), alpha = 0.3) +
      ggtitle(paste(df_calculations$OrfRep[1], df_calculations$Gene[1], sep = " ")) +
      annotate("text", x = 1, y = 0.9 * WT_sd, label = paste("ZShift =", round(df_interactions[[paste0("Z_Shift_", var)]], 2))) +
      annotate("text", x = 1, y = 0.7 * WT_sd, label = paste("lm Zscore =", round(df_interactions[[paste0("Z_lm_", var)]], 2))) +
      annotate("text", x = 1, y = -0.7 * WT_sd, label = paste("NG =", df_interactions$NG)) +
      annotate("text", x = 1, y = -0.9 * WT_sd, label = paste("DB =", df_interactions$DB)) +
      annotate("text", x = 1, y = -1.1 * WT_sd, label = paste("SM =", df_interactions$SM)) +
      scale_x_continuous(
        name = unique(df_calculations$Drug[1]),
        breaks = unique(df_calculations$conc_num_factor),
        labels = unique(as.character(df_calculations$conc_num))) +
      theme_publication()
  })
-  save_plots(file_prefix, plot_list, output_dir)
+#     ggplot(df_calculations, aes(conc_num_factor, !!sym(var))) +
 #       geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-WT_sd, WT_sd)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd), ymax = 0 + (2 * WT_sd)), alpha = 0.3) +
 #       ggtitle(paste(df_calculations$OrfRep[1], df_calculations$Gene[1], sep = " ")) +
 #       annotate("text", x = 1, y = 0.9 * WT_sd, label = paste("ZShift =", round(df_interactions[[paste0("Z_Shift_", var)]], 2))) +
 #       annotate("text", x = 1, y = 0.7 * WT_sd, label = paste("lm Zscore =", round(df_interactions[[paste0("Z_lm_", var)]], 2))) +
 #       annotate("text", x = 1, y = -0.7 * WT_sd, label = paste("NG =", df_interactions$NG)) +
 #       annotate("text", x = 1, y = -0.9 * WT_sd, label = paste("DB =", df_interactions$DB)) +
 #       annotate("text", x = 1, y = -1.1 * WT_sd, label = paste("SM =", df_interactions$SM)) +
 #       scale_x_continuous(
 #         name = unique(df_calculations$Drug[1]),
 #         breaks = unique(df_calculations$conc_num_factor),
 #         labels = unique(as.character(df_calculations$conc_num))) +
 #       theme_publication()
 #   })
 #   save_plots(file_prefix, plot_list, output_dir)
 # }
 # generate_summary_plots <- function(df, output_dir) {
 #   variables <- c("L", "K", "r", "AUC")
 #   plot_list <- lapply(variables, function(var) {
 #     generate_plot(df, x_var = "conc_num_factor", y_var = var, plot_type = "scatter", title = paste("Summary Plot for", var))
 #   })
 #   save_plots("Summary_Plots", plot_list, output_dir)
 # }
 # # Generate ranked plots for a specific metric
 # generate_ranked_plot <- function(df, rank_var, zscore_var, sd_threshold, title_prefix) {
 #   ggplot(df, aes(x = {{rank_var}}, y = {{zscore_var}})) +
 #     ggtitle(paste(title_prefix, "above", sd_threshold, "SD")) +
 #     xlab("Rank") + ylab(paste("Avg Z score", title_prefix)) +
 #     annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_threshold, ymax = Inf, fill = "#542788", alpha = 0.3) +
 #     annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_threshold, ymax = -Inf, fill = "orange", alpha = 0.3) +
 #     geom_hline(yintercept = c(-sd_threshold, sd_threshold)) +
 #     geom_point(size = 0.1, shape = 3) +
 #     theme_publication()
 # }
 # # Generate and save all ranked plots
 # generate_and_save_ranked_plots <- function(df, output_dir, prefix) {
 #   rank_metrics <- list(
 #     list("L_Rank", "Avg_Zscore_L", "L"),
 #     list("K_Rank", "Avg_Zscore_K", "K"),
 #     list("r_Rank", "Avg_Zscore_r", "r"),
 #     list("AUC_Rank", "Avg_Zscore_AUC", "AUC"),
 #     list("L_Rank_lm", "Z_lm_L", "L"),
 #     list("K_Rank_lm", "Z_lm_K", "K"),
 #     list("r_Rank_lm", "Z_lm_r", "r"),
 #     list("AUC_Rank_lm", "Z_lm_AUC", "AUC")
 #   )
 #   pdf(file.path(output_dir, paste0(prefix, ".pdf")), width = 18, height = 12, onefile = TRUE)
 #   for (sd_threshold in c(1, 2, 3)) {
 #     for (metric in rank_metrics) {
 #       plot <- generate_ranked_plot(df, sym(metric[[1]]), sym(metric[[2]]), sd_threshold, metric[[3]])
 #       print(plot)
 #     }
 #   }
 #   dev.off()
 # }
 generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num",
  title, x_label = NULL, y_label = NULL, ylim_vals = NULL) {
  plot <- ggplot(df, aes_string(x = x_var, color = color_var))
  if (!is.null(y_var)) {
    plot <- plot + aes_string(y = y_var)
  }
  # Set up the plot based on the requested plot type
  plot <- switch(plot_type,
    "scatter" = plot + geom_point() + geom_smooth(method = "lm", se = FALSE),
    "box" = plot + geom_boxplot(),
    "density" = plot + geom_density(),
    "bar" = plot + geom_bar(stat = "identity"),
    plot  # Default: return the plot as is
  )
  if (!is.null(ylim_vals)) {
    plot <- plot + coord_cartesian(ylim = ylim_vals)
  }
  # Add titles and labels if available
  plot <- plot + ggtitle(title) + theme_publication()
  if (!is.null(x_label)) plot <- plot + xlab(x_label)
  if (!is.null(y_label)) plot <- plot + ylab(y_label)
  return(plot)
 }
 generate_summary_plots <- function(df, output_dir) {
  variables <- c("L", "K", "r", "AUC")
  plot_list <- lapply(variables, function(var) {
    generate_plot(df, x_var = "conc_num_factor", y_var = var, plot_type = "scatter", title = paste("Summary Plot for", var))
  })
-  save_plots("Summary_Plots", plot_list, output_dir)
+generate_and_save_plots <- function(df, output_dir, file_prefix, variables, plot_type = "scatter", include_qc = FALSE, ylim_vals = NULL) {
  plots <- list()
  if (nrow(df) == 0) {
    message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots")
    return()
  }
  message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
  # Create plots for the given variables
  for (var in variables) {
    plot <- generate_plot(
      df = df,
      x_var = "scan",
      y_var = var,
      plot_type = plot_type,
      title = paste(file_prefix, "Plot for", var),
      ylim_vals = ylim_vals
    )
    plots[[paste0(var, "_", plot_type)]] <- plot
  }
  # Include additional QC plots if requested
  if (include_qc) {
    plots[["Raw_L_vs_K"]] <- generate_plot(df, "L", "K", "scatter", "Raw L vs K before QC")
    plots[["Delta_bg_Density"]] <- generate_plot(df, "delta_bg", NULL, "density", "Density plot for Delta Background")
    plots[["Delta_bg_Bar"]] <- generate_plot(df, "delta_bg", NULL, "bar", "Bar plot for Delta Background")
  }
  save_plots(file_prefix, plots, output_dir)
 }
 # Ensure all plots are saved and printed to PDF
 save_plots <- function(file_name, plot_list, output_dir) {
  # Save to PDF
  pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
  lapply(plot_list, function(plot) {
    print(plot)
  })
  dev.off()
  # Save to HTML with horizontal legend orientation
  lapply(names(plot_list), function(plot_name) {
    message("Generating plot: ", plot_name)
    pgg <- tryCatch({
      suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
        layout(legend = list(orientation = "h")))
    }, error = function(e) {
      message("Error in plot: ", plot_name, "\n", e)
      return(NULL)
    })
    if (!is.null(pgg)) {
      saveWidget(pgg,
        file = file.path(output_dir,
        paste0(file_name, "_", plot_name, ".html")),
        selfcontained = TRUE)
    }
  })
 }
@@ -472,42 +524,6 @@ adjust_missing_and_rank <- function(df) {
  return(df)
 }
 # Generate ranked plots for a specific metric
 generate_ranked_plot <- function(df, rank_var, zscore_var, sd_threshold, title_prefix) {
  ggplot(df, aes(x = {{rank_var}}, y = {{zscore_var}})) +
    ggtitle(paste(title_prefix, "above", sd_threshold, "SD")) +
    xlab("Rank") + ylab(paste("Avg Z score", title_prefix)) +
    annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_threshold, ymax = Inf, fill = "#542788", alpha = 0.3) +
    annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_threshold, ymax = -Inf, fill = "orange", alpha = 0.3) +
    geom_hline(yintercept = c(-sd_threshold, sd_threshold)) +
    geom_point(size = 0.1, shape = 3) +
    theme_publication()
 }
 # Generate and save all ranked plots
 generate_and_save_ranked_plots <- function(df, output_dir, prefix) {
  rank_metrics <- list(
    list("L_Rank", "Avg_Zscore_L", "L"),
    list("K_Rank", "Avg_Zscore_K", "K"),
    list("r_Rank", "Avg_Zscore_r", "r"),
    list("AUC_Rank", "Avg_Zscore_AUC", "AUC"),
    list("L_Rank_lm", "Z_lm_L", "L"),
    list("K_Rank_lm", "Z_lm_K", "K"),
    list("r_Rank_lm", "Z_lm_r", "r"),
    list("AUC_Rank_lm", "Z_lm_AUC", "AUC")
  )
  pdf(file.path(output_dir, paste0(prefix, ".pdf")), width = 18, height = 12, onefile = TRUE)
  for (sd_threshold in c(1, 2, 3)) {
    for (metric in rank_metrics) {
      plot <- generate_ranked_plot(df, sym(metric[[1]]), sym(metric[[2]]), sd_threshold, metric[[3]])
      print(plot)
    }
  }
  dev.off()
 }
 # Function to create and save all ranked plots
 create_ranked_plots <- function(df, output_dir) {
@@ -520,8 +536,45 @@ create_ranked_plots <- function(df, output_dir) {
  generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm_naRM")
 }
 generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num", title_prefix = "",
  x_label = NULL, y_label = NULL, ylim_vals = NULL, output_dir, file_prefix = "plot") {
  plot_list <- list()
  # Iterate over each y variable and generate the corresponding plot
  for (var in y_vars) {
    plot <- ggplot(df, aes(x = !!sym(x_var), y = !!sym(var), color = as.factor(!!sym(color_var)))) +
      ggtitle(paste(title_prefix, var)) +
      theme_publication()
    if (plot_type == "scatter") {
      plot <- plot +
        geom_point() +
        geom_smooth(method = "lm", formula = y ~ x, se = FALSE)
      if (!is.null(ylim_vals)) {
        plot <- plot + coord_cartesian(ylim = ylim_vals)
      }
    } else if (plot_type == "box") {
      plot <- plot + geom_boxplot()
    } else if (plot_type == "density") {
      plot <- plot + geom_density()
    } else if (plot_type == "bar") {
      plot <- plot + geom_bar(stat = "identity")
    }
    if (!is.null(x_label)) plot <- plot + xlab(x_label)
    if (!is.null(y_label)) plot <- plot + ylab(y_label)
    plot_list[[var]] <- plot
  }
  # Save plots to PDF and HTML
  save_plots(file_prefix, plot_list, output_dir)
 }
 main <- function() {
  # Applying to all experiments
  lapply(names(args$experiments), function(exp_name) {
    exp <- args$experiments[[exp_name]]
    exp_path <- exp$path
@@ -537,34 +590,28 @@ main <- function() {
    max_conc <- max(df$conc_num_factor)
-    # QC
+    # QC steps and filtering
    # Filter the df above sd tolerance for raw L vs K plot
    df_above_tolerance <- df %>% filter(DB == 1)
    # Set vars above the delta background tolerance to NA
    df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .x)))
    # Remove rows where L is 0
    df_no_zeros <- df_na %>% filter(L > 0)
    # Flag and remove non-finite data, printing affected rows
    df_na_filtered <- df_na %>%
-      filter(if_any(c(L), ~ !is.finite(.))) %>% # Add L, r, AUC, K as needed for debugging
+      filter(if_any(c(L), ~ !is.finite(.))) %>%
      {
        if (nrow(.) > 0) {
          message("Removing non-finite rows:\n")
          print(head(., n = 10))
        }
-        df_na %>% filter(if_all(c(L), is.finite)) # Add L, r, AUC, K as needed for debugging
+        df_na %>% filter(if_all(c(L), is.finite))
      }
-    # Generate QC PDFs and HTMLs
+    # Generate and save QC plots using the new generalized function
-    # message("Generating QC plots")
+    message("Generating QC plots")
-    # variables <- c("L", "K", "r", "AUC", "delta_bg")
+    variables <- c("L", "K", "r", "AUC", "delta_bg")
-    # generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
+    generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
-    # generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
+    generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
-    # generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
+    generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
-    # generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
+    generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
    rm(df, df_above_tolerance, df_no_zeros)
@@ -577,37 +624,35 @@ main <- function() {
    # Originally this filtered L NA's
-    # Filter data within 2SD
+    # Filter data within and outside 2SD
    within_2sd_k <- stats_joined %>%
      filter(K >= (K_mean - 2 * K_sd) & K <= (K_mean + 2 * K_sd))
    # Filter data outside 2SD
    outside_2sd_k <- stats_joined %>%
      filter(K < (K_mean - 2 * K_sd) | K > (K_mean + 2 * K_sd))
-    message("Calculating summary statistics for L within 2SD of K")
+    # Summary statistics for within and outside 2SD of K
    l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
    l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
    # Write CSV files
    write.csv(l_within_2sd_k, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
    write.csv(l_outside_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
    message("Calculating summary statistics for L within 2SD of K")
    cols_to_remove <- names(l_within_2sd_k)
    cols_to_keep <- c("conc_num", "conc_num_factor")
    within_2sd_k_clean <- within_2sd_k %>%
      select(-all_of(setdiff(cols_to_remove, cols_to_keep)))
    l_within_2sd_k_joined <- within_2sd_k_clean %>%
      left_join(l_within_2sd_k, by = c("conc_num", "conc_num_factor"))
    write.csv(l_within_2sd_k,
      file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
      row.names = FALSE)
    message("Calculating summary statistics for for L outside 2SD of K")
    l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
    cols_to_remove <- names(l_outside_2sd_k)
    cols_to_keep <- c("conc_num", "conc_num_factor")
    outside_2sd_k_clean <- outside_2sd_k %>%
      select(-all_of(setdiff(cols_to_remove, cols_to_keep)))
    l_outside_2sd_k_joined <- outside_2sd_k_clean %>%
      left_join(l_outside_2sd_k, by = c("conc_num", "conc_num_factor"))
    write.csv(l_outside_2sd_k,
      file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
      row.names = FALSE)
    # Process background strains
    bg_strains <- c("YDL227C")
@@ -646,18 +691,14 @@ main <- function() {
        filter(OrfRep != strain) %>%
        mutate(SM = 0)
      message("Processing reference strain")
      reference_strain <- process_strains(l_within_2sd_k_joined)
      message("Processing deletion strains")
      deletion_strains <- process_strains(l_within_2sd_k_joined)
      # TODO we may need to add "num" to grouping vars
      # Calculate interactions
      variables <- c("L", "K", "r", "AUC")
      message("Calculating reference interaction scores")
      reference_results <- calculate_interaction_scores(stats_joined, reference_strain, max_conc, variables)
      message("Calculating deletion interaction scores")
      deletion_results <- calculate_interaction_scores(stats_joined, deletion_strains, max_conc, variables)
      zscores_calculations_reference <- reference_results$zscores_calculations
@@ -666,11 +707,11 @@ main <- function() {
      zscores_interactions <- deletion_results$zscores_interactions
      # TODO: I don't know if we need this?
-      zscores_interactions <- zscores_interactions %>%
+      # zscores_interactions <- zscores_interactions %>%
-        arrange(desc(Z_lm_L)) %>%
+      #   arrange(desc(Z_lm_L)) %>%
-        arrange(desc(NG))
+      #   arrange(desc(NG))
-      message("Writing zscores csv files")
+      # Writing Z-Scores to file
      write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
      write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
      write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
@@ -678,10 +719,11 @@ main <- function() {
      # Define conditions for enhancers and suppressors
      # TODO Add to study config file?
-      enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2
+      threshold <- 2
-      suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2
+      enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold
-      enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2
+      suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold
-      suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2
+      enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= threshold
      suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -threshold
      # Subset data
      enhancers_L <- zscores_interactions[enhancer_condition_L, ]
@@ -707,10 +749,11 @@ main <- function() {
        file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
      # Handle linear model based enhancers and suppressors
-      enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ]
+      lm_threshold <- 2
-      suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ]
+      enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= lm_threshold, ]
-      enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ]
+      suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -lm_threshold, ]
-      suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
+      enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= lm_threshold, ]
      suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -lm_threshold, ]
      # Save linear model based enhancers and suppressors
      message("Writing linear model enhancer/suppressor csv files")
@@ -724,8 +767,10 @@ main <- function() {
        file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
      # Generate plots for interaction scores
-      generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir)
+      # generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir)
-      generate_rf_plots(zscores_calculations, zscores_interactions, out_dir)
+      # generate_rf_plots(zscores_calculations, zscores_interactions, out_dir)
      generate_and_save_plots(zscores_calculations_reference, out_dir, "Reference_Calculations", variables)
      generate_and_save_plots(zscores_calculations, out_dir, "Deletion_Calculations", variables)
      # Apply filtering to remove NA values before further analysis
      zscores_interactions_filtered <- zscores_interactions %>%
--- a/workflow/qhtcp-workflow
+++ b/workflow/qhtcp-workflow
@@ -785,7 +785,7 @@ init_project() {
  debug "Running: ${FUNCNAME[0]}"
  # Create a project scans dir
-  [[ -d $PROJECT_SCANS_DIR ]] || (execute mkdir -p "$PROJECT_SCANS_DIR" || return 1)
+  [[ -d $PROJECT_SCANS_DIR ]] || { execute mkdir -p "$PROJECT_SCANS_DIR" || return 1; }
  # TODO eventually copy scans from robot but for now let's pause and wait for transfer
  echo "In the future we will copy scans from robot here"
@@ -1561,7 +1561,7 @@ join_interaction_zscores() {
    "${EXP_PATHS_AND_NAMES[@]}"
  for f in "${out_files[@]}"; do
-    [[ -f $f ]] || (echo "$f does not exist"; return 1)
+    [[ -f $f ]] || { echo "$f does not exist"; return 1; }
  done
 }
@@ -1606,7 +1606,7 @@ r_gta() {
    "${5:-"$GTA_OUT_DIR"}" \
    "${@:6}" # future arguments
-  [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
+  [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
 }
@@ -1779,7 +1779,7 @@ r_add_shift_values() {
  rm -f "$STUDY_RESULTS_DIR/REMcHeatmaps/"*.pdf # TODO why?
  out_file="${4:-"$STUDY_RESULTS_DIR/REMcWithShift.csv"}"
-  [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
+  [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
 }
@@ -1815,7 +1815,7 @@ r_create_heat_maps() {
  pdfs=(REMcHeatmaps/*.pdf)
  execute pdftk "${pdfs[@]}" output "$out_file"
  out_file="$2/compiledREMcHeatmaps.pdf"
-  [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
+  [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
 }
@@ -1845,7 +1845,7 @@ r_heat_maps_homology() {
  pdfs=("${1:-"$STUDY_RESULTS_DIR/homology"}"/*.pdf)
  execute pdftk "${pdfs[@]}" output "$out_file"
-  [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
+  [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
 }
@@ -1869,7 +1869,7 @@ py_gtf_dcon() {
    "$2/" \
    "${@:3}" # future arguments
  out_file="$2/1-0-0-finaltable.csv"
-  [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
+  [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
 }
@@ -1930,7 +1930,7 @@ py_gtf_concat() {
  debug "Running: ${FUNCNAME[0]} $*"
  script="$APPS_DIR/python/concatGTFResults.py"
  execute "$PYTHON" "$script" "$1/" "$2"
-  [[ -f $2 ]] || (echo "$2 does not exist"; return 1)
+  [[ -f $2 ]] || { echo "$2 does not exist"; return 1; }
 }
@@ -1983,7 +1983,7 @@ handle_results_dir() {
      results_dir="${results_dirs[0]}"
    else
      # Print results dirs
-      [[ ${#results_dirs[@]} -gt 0 ]] || (err "No ${2}s found"; return 1)
+      [[ ${#results_dirs[@]} -gt 0 ]] || { err "No ${2}s found"; return 1; }
      print_in_columns "results_dirs"
      # Results selection prompt
      cat <<-EOF
@@ -2091,7 +2091,7 @@ handle_config() {
  # Declare a nameref to refer to the actual array using the variable name
  declare -n config_array_ref="$config_array_name"
-  [[ -z ${!config_array_ref[*]} ]] && (err "No $config_array_name array found in $config_file" && return 1)
+  [[ -z ${!config_array_ref[*]} ]] && { err "No $config_array_name array found in $config_file" && return 1; }
  for key in "${!config_array_ref[@]}"; do
    IFS=',' read -r main_key sub_key <<< "$key"
@@ -2298,7 +2298,7 @@ main() {
    if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then
      echo "Module ${MODULES[$i]} is not available, removing"
      read -r -p "Enter replacement module name: " module
-      ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${module}[[:space:]] ]] || (echo "RTFM"; return 1)
+      ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${module}[[:space:]] ]] || { echo "RTFM"; return 1; }
      MODULES[i]="$module"
    fi
    unset module