浏览代码

Fix interaction groupings

Bryan Roessler 6 月之前
父节点
当前提交
b23c6dafef
共有 1 个文件被更改,包括 219 次插入171 次删除
  1. 219 171
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 219 - 171
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -163,23 +163,19 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
   return(df)
   return(df)
 }
 }
 
 
-# Update Gene names using the SGD gene list
 update_gene_names <- function(df, sgd_gene_list) {
 update_gene_names <- function(df, sgd_gene_list) {
-  # Load SGD gene list
-  genes <- read.delim(file = sgd_gene_list,
-    quote = "", header = FALSE,
+  genes <- read.delim(file = sgd_gene_list, quote = "", header = FALSE,
     colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
     colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
   
   
-  # Create a named vector for mapping ORF to GeneName
-  gene_map <- setNames(genes$V5, genes$V4)
-  # Vectorized match to find the GeneName from gene_map
-  mapped_genes <- gene_map[df$ORF]
-  # Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
-  updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
-  # Ensure Gene is not left blank or incorrectly updated to "OCT1"
+  gene_map <- setNames(genes$V5, genes$V4) # ORF to GeneName mapping
+
   df <- df %>%
   df <- df %>%
-    mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
-  
+    mutate(
+      mapped_genes = gene_map[ORF],
+      Gene = if_else(is.na(mapped_genes) | OrfRep == "YDL227C", Gene, mapped_genes),
+      Gene = if_else(Gene == "" | Gene == "OCT1", OrfRep, Gene) # Handle invalid names
+    )
+
   return(df)
   return(df)
 }
 }
 
 
@@ -203,61 +199,82 @@ calculate_summary_stats <- function(df, variables, group_vars) {
     )
     )
   
   
   # Create a cleaned version of df that doesn't overlap with summary_stats
   # Create a cleaned version of df that doesn't overlap with summary_stats
-  cleaned_df <- df %>%
+  df_cleaned <- df %>%
     select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
     select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
   
   
-  df_joined <- left_join(cleaned_df, summary_stats, by = group_vars)
+  df_joined <- left_join(df_cleaned, summary_stats, by = group_vars)
 
 
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
 }
 }
 
 
-calculate_interaction_scores <- function(df, df_bg, group_vars) {
+calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) {
 
 
   max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
   max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
-  
-  # Include background statistics per concentration
-  bg_stats <- df_bg %>%
-    group_by(conc_num, conc_num_factor) %>%
+  total_conc_num <- length(unique(df$conc_num))
+
+  # Calculate WT statistics from df_bg
+  wt_stats <- df_bg %>%
+    filter(conc_num == 0) %>% # use the zero drug concentration background
     summarise(
     summarise(
-      WT_L = first(mean_L),
-      WT_K = first(mean_K),
-      WT_r = first(mean_r),
-      WT_AUC = first(mean_AUC),
-      WT_sd_L = first(sd_L),
-      WT_sd_K = first(sd_K),
-      WT_sd_r = first(sd_r),
-      WT_sd_AUC = first(sd_AUC),
-      .groups = "drop"
+      WT_L = mean(mean_L, na.rm = TRUE),
+      WT_sd_L = mean(sd_L, na.rm = TRUE),
+      WT_K = mean(mean_K, na.rm = TRUE),
+      WT_sd_K = mean(sd_K, na.rm = TRUE),
+      WT_r = mean(mean_r, na.rm = TRUE),
+      WT_sd_r = mean(sd_r, na.rm = TRUE),
+      WT_AUC = mean(mean_AUC, na.rm = TRUE),
+      WT_sd_AUC = mean(sd_AUC, na.rm = TRUE)
     )
     )
-  
-  # Calculate total number of concentrations
-  total_conc_num <- length(unique(df$conc_num))
-  
-  # Join background statistics to df
-  calculations <- df %>%
-    left_join(bg_stats, by = c("conc_num", "conc_num_factor"))
-  
-  # Perform calculations
-  calculations <- calculations %>%
+
+  # Add WT statistics to df
+  df <- df %>%
+    mutate(
+      WT_L = wt_stats$WT_L,
+      WT_sd_L = wt_stats$WT_sd_L,
+      WT_K = wt_stats$WT_K,
+      WT_sd_K = wt_stats$WT_sd_K,
+      WT_r = wt_stats$WT_r,
+      WT_sd_r = wt_stats$WT_sd_r,
+      WT_AUC = wt_stats$WT_AUC,
+      WT_sd_AUC = wt_stats$WT_sd_AUC
+    )
+
+  # Compute mean values at zero concentration
+  mean_L_zero_df <- df %>%
+    filter(conc_num == 0) %>%
     group_by(across(all_of(group_vars))) %>%
     group_by(across(all_of(group_vars))) %>%
+    summarise(
+      mean_L_zero = mean(mean_L, na.rm = TRUE),
+      mean_K_zero = mean(mean_K, na.rm = TRUE),
+      mean_r_zero = mean(mean_r, na.rm = TRUE),
+      mean_AUC_zero = mean(mean_AUC, na.rm = TRUE),
+      .groups = "drop"
+    )
+
+  # Join mean_L_zero_df to df
+  df <- df %>%
+    left_join(mean_L_zero_df, by = group_vars)
+
+  # Calculate Raw Shifts and Z Shifts
+  df <- df %>%
     mutate(
     mutate(
-      N = n(),
-      NG = sum(NG, na.rm = TRUE),
-      DB = sum(DB, na.rm = TRUE),
-      SM = sum(SM, na.rm = TRUE),
-      num_non_removed_concs = n_distinct(conc_num[DB != 1]),
-      
-      # Raw shifts
-      Raw_Shift_L = mean_L - WT_L,
-      Raw_Shift_K = mean_K - WT_K,
-      Raw_Shift_r = mean_r - WT_r,
-      Raw_Shift_AUC = mean_AUC - WT_AUC,
-      
-      # Z shifts
+      Raw_Shift_L = mean_L_zero - WT_L,
+      Raw_Shift_K = mean_K_zero - WT_K,
+      Raw_Shift_r = mean_r_zero - WT_r,
+      Raw_Shift_AUC = mean_AUC_zero - WT_AUC,
       Z_Shift_L = Raw_Shift_L / WT_sd_L,
       Z_Shift_L = Raw_Shift_L / WT_sd_L,
       Z_Shift_K = Raw_Shift_K / WT_sd_K,
       Z_Shift_K = Raw_Shift_K / WT_sd_K,
       Z_Shift_r = Raw_Shift_r / WT_sd_r,
       Z_Shift_r = Raw_Shift_r / WT_sd_r,
-      Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC,
+      Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC
+    )
+  
+  calculations <- df %>%
+    group_by(across(all_of(group_vars))) %>%
+    mutate(
+      NG_sum = sum(NG, na.rm = TRUE),
+      DB_sum = sum(DB, na.rm = TRUE),
+      SM_sum = sum(SM, na.rm = TRUE),
+      num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
       
       
       # Expected values
       # Expected values
       Exp_L = WT_L + Raw_Shift_L,
       Exp_L = WT_L + Raw_Shift_L,
@@ -270,39 +287,33 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
       Delta_K = mean_K - Exp_K,
       Delta_K = mean_K - Exp_K,
       Delta_r = mean_r - Exp_r,
       Delta_r = mean_r - Exp_r,
       Delta_AUC = mean_AUC - Exp_AUC,
       Delta_AUC = mean_AUC - Exp_AUC,
-      
-      # Adjust Deltas for NG and SM
+
+      # Adjust deltas for NG and SM
       Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
       Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
       Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
       Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
       Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
       Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
       Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
       Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
       Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
       Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
       
       
-      # Z-scores
+      # Calculate Z-scores
       Zscore_L = Delta_L / WT_sd_L,
       Zscore_L = Delta_L / WT_sd_L,
       Zscore_K = Delta_K / WT_sd_K,
       Zscore_K = Delta_K / WT_sd_K,
       Zscore_r = Delta_r / WT_sd_r,
       Zscore_r = Delta_r / WT_sd_r,
       Zscore_AUC = Delta_AUC / WT_sd_AUC
       Zscore_AUC = Delta_AUC / WT_sd_AUC
     ) %>%
     ) %>%
-    ungroup()
-  
-  # Fit linear models within each group
-  calculations <- calculations %>%
-    group_by(across(all_of(group_vars))) %>%
     group_modify(~ {
     group_modify(~ {
-      data <- .x
-      # Fit linear models
-      lm_L <- lm(Delta_L ~ conc_num_factor, data = data)
-      lm_K <- lm(Delta_K ~ conc_num_factor, data = data)
-      lm_r <- lm(Delta_r ~ conc_num_factor, data = data)
-      lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = data)
-      data <- data %>%
+      # Perform linear models
+      lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
+      lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
+      lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
+      lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
+    
+      .x %>%
         mutate(
         mutate(
           lm_intercept_L = coef(lm_L)[1],
           lm_intercept_L = coef(lm_L)[1],
           lm_slope_L = coef(lm_L)[2],
           lm_slope_L = coef(lm_L)[2],
           R_Squared_L = summary(lm_L)$r.squared,
           R_Squared_L = summary(lm_L)$r.squared,
           lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
           lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
-          # Repeat for K, r, and AUC
           lm_intercept_K = coef(lm_K)[1],
           lm_intercept_K = coef(lm_K)[1],
           lm_slope_K = coef(lm_K)[2],
           lm_slope_K = coef(lm_K)[2],
           R_Squared_K = summary(lm_K)$r.squared,
           R_Squared_K = summary(lm_K)$r.squared,
@@ -316,11 +327,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
           R_Squared_AUC = summary(lm_AUC)$r.squared,
           R_Squared_AUC = summary(lm_AUC)$r.squared,
           lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC
           lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC
         )
         )
-      return(data)
     }) %>%
     }) %>%
     ungroup()
     ungroup()
-  
-  # Compute lm means and sds across all data without grouping
+
+  # Summary statistics for lm scores
   lm_means_sds <- calculations %>%
   lm_means_sds <- calculations %>%
     summarise(
     summarise(
       lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
       lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
@@ -330,26 +340,39 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
       lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
       lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
       lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
       lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
       lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
       lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
-      lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
+      lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE),
+      .groups = "drop"
     )
     )
-  
-  # Apply global lm means and sds to calculate Z_lm_*
+
   calculations <- calculations %>%
   calculations <- calculations %>%
     mutate(
     mutate(
-      Z_lm_L = (lm_Score_L - lm_means_sds$lm_mean_L) / lm_means_sds$lm_sd_L,
-      Z_lm_K = (lm_Score_K - lm_means_sds$lm_mean_K) / lm_means_sds$lm_sd_K,
-      Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r,
-      Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC
+      lm_mean_L = lm_means_sds$lm_mean_L,
+      lm_sd_L = lm_means_sds$lm_sd_L,
+      lm_mean_K = lm_means_sds$lm_mean_K,
+      lm_sd_K = lm_means_sds$lm_sd_K,
+      lm_mean_r = lm_means_sds$lm_mean_r,
+      lm_sd_r = lm_means_sds$lm_sd_r,
+      lm_mean_AUC = lm_means_sds$lm_mean_AUC,
+      lm_sd_AUC = lm_means_sds$lm_sd_AUC
     )
     )
   
   
-  # Build interactions data frame
+  # Continue with gene Z-scores and interactions
+  calculations <- calculations %>%
+    mutate(
+      Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
+      Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
+      Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
+      Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
+    )
+
+  # Build summary stats (interactions)
   interactions <- calculations %>%
   interactions <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
     group_by(across(all_of(group_vars))) %>%
     summarise(
     summarise(
-      Avg_Zscore_L = mean(Zscore_L, na.rm = TRUE),
-      Avg_Zscore_K = mean(Zscore_K, na.rm = TRUE),
-      Avg_Zscore_r = mean(Zscore_r, na.rm = TRUE),
-      Avg_Zscore_AUC = mean(Zscore_AUC, na.rm = TRUE),
+      Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
+      Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
+      Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / (total_conc_num - 1),
+      Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / (total_conc_num - 1),
       
       
       # Interaction Z-scores
       # Interaction Z-scores
       Z_lm_L = first(Z_lm_L),
       Z_lm_L = first(Z_lm_L),
@@ -368,28 +391,68 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
       Z_Shift_K = first(Z_Shift_K),
       Z_Shift_K = first(Z_Shift_K),
       Z_Shift_r = first(Z_Shift_r),
       Z_Shift_r = first(Z_Shift_r),
       Z_Shift_AUC = first(Z_Shift_AUC),
       Z_Shift_AUC = first(Z_Shift_AUC),
-      
-      # NG, DB, SM values
-      NG = first(NG),
-      DB = first(DB),
-      SM = first(SM),
-      
+
       # R Squared values
       # R Squared values
       R_Squared_L = first(R_Squared_L),
       R_Squared_L = first(R_Squared_L),
       R_Squared_K = first(R_Squared_K),
       R_Squared_K = first(R_Squared_K),
       R_Squared_r = first(R_Squared_r),
       R_Squared_r = first(R_Squared_r),
       R_Squared_AUC = first(R_Squared_AUC),
       R_Squared_AUC = first(R_Squared_AUC),
       
       
-      # Include Drug
-      Drug = first(Drug),
-      
+      # NG, DB, SM values
+      NG = first(NG),
+      DB = first(DB),
+      SM = first(SM),
+
       .groups = "drop"
       .groups = "drop"
     )
     )
-  
-  # Return the dataframes without creating full_data
+
+  # Creating the final calculations and interactions dataframes with only required columns for csv output
+  calculations_df <- calculations %>%
+    select(
+      all_of(group_vars),
+      conc_num, conc_num_factor, conc_num_factor_factor,
+      N, NG, DB, SM,
+      mean_L, median_L, sd_L, se_L,
+      mean_K, median_K, sd_K, se_K,
+      mean_r, median_r, sd_r, se_r,
+      mean_AUC, median_AUC, sd_AUC, se_AUC,
+      Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
+      Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
+      WT_L, WT_K, WT_r, WT_AUC,
+      WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
+      Exp_L, Exp_K, Exp_r, Exp_AUC,
+      Delta_L, Delta_K, Delta_r, Delta_AUC,
+      Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
+    )
+
+  interactions_df <- interactions %>%
+    select(
+      all_of(group_vars),
+      NG, DB, SM,
+      Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
+      Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
+      Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
+      Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC
+    )
+
+  calculations_no_overlap <- calculations %>%
+    # DB, NG, SM are same as in interactions, the rest may be different and need to be checked
+    select(-any_of(c(
+      "DB", "NG", "SM",
+      "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
+      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
+      "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC"
+    )))
+
+  # Use left_join to avoid dimension mismatch issues
+  full_data <- calculations_no_overlap %>%
+    left_join(interactions, by = group_vars)
+
+  # Return full_data and the two required dataframes (calculations and interactions)
   return(list(
   return(list(
-    calculations = calculations,
-    interactions = interactions
+    calculations = calculations_df,
+    interactions = interactions_df,
+    full_data = full_data
   ))
   ))
 }
 }
 
 
@@ -475,14 +538,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
             ), color = error_bar_color)
             ), color = error_bar_color)
           }
           }
         } else {
         } else {
-          # Original code for calculating from mean and sd
+          # Ensure the mean and sd columns exist
           y_mean_col <- paste0("mean_", config$y_var)
           y_mean_col <- paste0("mean_", config$y_var)
           y_sd_col <- paste0("sd_", config$y_var)
           y_sd_col <- paste0("sd_", config$y_var)
-          plot <- plot + geom_errorbar(aes(
-            x = .data[[config$x_var]],
-            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
-            ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
-          ), color = error_bar_color)
+          
+          if (y_mean_col %in% colnames(df) && y_sd_col %in% colnames(df)) {
+            plot <- plot + geom_errorbar(aes(
+              x = .data[[config$x_var]],
+              ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
+              ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
+            ), color = error_bar_color)
+          }
         }
         }
       }
       }
 
 
@@ -714,7 +780,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
   return(list(plots = plot_configs))
   return(list(plots = plot_configs))
 }
 }
 
 
-generate_interaction_plot_configs <- function(df, df_calculations, df_interactions, type) {
+generate_interaction_plot_configs <- function(df, type) {
   
   
   # Define the y-limits for the plots
   # Define the y-limits for the plots
   limits_map <- list(
   limits_map <- list(
@@ -762,13 +828,14 @@ generate_interaction_plot_configs <- function(df, df_calculations, df_interactio
         )
         )
         plot_config$position <- "jitter"
         plot_config$position <- "jitter"
         
         
+        # Annotation labels
         annotations <- list(
         annotations <- list(
-          list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min
-          list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
-          list(x = 0.25, y = y_limits[1], label = "SM =")
+          list(x = 0, y = y_limits[1] + 0.1 * y_span, label = "NG ="),
+          list(x = 0, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
+          list(x = 0, y = y_limits[1], label = "SM =")
         )
         )
-        
-        # Loop over unique x values and add NG, DB, SM values at calculated y positions
+
+        # Annotation values
         for (x_val in unique(df$conc_num_factor_factor)) {
         for (x_val in unique(df$conc_num_factor_factor)) {
           current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
           current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
           annotations <- append(annotations, list(
           annotations <- append(annotations, list(
@@ -794,7 +861,6 @@ generate_interaction_plot_configs <- function(df, df_calculations, df_interactio
     }
     }
   }
   }
   
   
-  # Delta interaction plots (use df_calculations and df_interactions)
   if (type == "reference") {
   if (type == "reference") {
     group_vars <- c("OrfRep", "Gene", "num")
     group_vars <- c("OrfRep", "Gene", "num")
   } else if (type == "deletion") {
   } else if (type == "deletion") {
@@ -1195,14 +1261,14 @@ main <- function() {
     ss <- calculate_summary_stats(df_na_within_2sd_k, "L",
     ss <- calculate_summary_stats(df_na_within_2sd_k, "L",
       group_vars = c("conc_num"))$summary_stats
       group_vars = c("conc_num"))$summary_stats
     write.csv(ss,
     write.csv(ss,
-      file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"),
+      file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"),
       row.names = FALSE)
       row.names = FALSE)
     
     
     message("Calculating summary statistics for L outside 2SD of K")
     message("Calculating summary statistics for L outside 2SD of K")
     ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num"))
     ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num"))
     df_na_l_outside_2sd_k_stats <- ss$df_with_stats
     df_na_l_outside_2sd_k_stats <- ss$df_with_stats
     write.csv(ss$summary_stats,
     write.csv(ss$summary_stats,
-      file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
+      file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2SD_K.csv"),
       row.names = FALSE)
       row.names = FALSE)
 
 
     plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
     plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
@@ -1300,11 +1366,10 @@ main <- function() {
         plot_configs = plate_analysis_no_zeros_boxplot_configs),
         plot_configs = plate_analysis_no_zeros_boxplot_configs),
       list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
       list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
         plot_configs = l_outside_2sd_k_plot_configs),
         plot_configs = l_outside_2sd_k_plot_configs),
-      list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
+      list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2SD_outside_mean_K",
         plot_configs = delta_bg_outside_2sd_k_plot_configs)
         plot_configs = delta_bg_outside_2sd_k_plot_configs)
     )
     )
 
 
-    # Generating quality control plots in parallel
     # furrr::future_map(plot_configs, function(config) {
     # furrr::future_map(plot_configs, function(config) {
     #   generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
     #   generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
     # }, .options = furrr_options(seed = TRUE))
     # }, .options = furrr_options(seed = TRUE))
@@ -1325,9 +1390,9 @@ main <- function() {
         ) %>%
         ) %>%
         filter(!is.na(L))
         filter(!is.na(L))
       
       
-      message("Calculating summary statistics for background strain")
+      message("Calculating background strain summary statistics")
       ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"),
       ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"),
-        group_vars = c("OrfRep", "conc_num"))
+        group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
       summary_stats_bg <- ss_bg$summary_stats
       summary_stats_bg <- ss_bg$summary_stats
       df_bg_stats <- ss_bg$df_with_stats
       df_bg_stats <- ss_bg$df_with_stats
       write.csv(
       write.csv(
@@ -1339,7 +1404,7 @@ main <- function() {
       df_reference <- df_na_stats %>% # formerly X2_RF
       df_reference <- df_na_stats %>% # formerly X2_RF
         filter(OrfRep == strain) %>%
         filter(OrfRep == strain) %>%
         filter(!is.na(L)) %>%
         filter(!is.na(L)) %>%
-        group_by(conc_num) %>%
+        group_by(OrfRep, Drug, conc_num) %>%
         mutate(
         mutate(
           max_l_theoretical = max(max_L, na.rm = TRUE),
           max_l_theoretical = max(max_L, na.rm = TRUE),
           L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
           L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
@@ -1347,21 +1412,25 @@ main <- function() {
           L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
           L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
         ungroup()
         ungroup()
 
 
-      message("Calculating reference strain interaction scores")
+      message("Calculating reference strain summary statistics")
       df_reference_stats <- calculate_summary_stats(
       df_reference_stats <- calculate_summary_stats(
         df = df_reference,
         df = df_reference,
         variables = c("L", "K", "r", "AUC"),
         variables = c("L", "K", "r", "AUC"),
-        group_vars = c("OrfRep", "Gene", "num", "conc_num")
+        group_vars = c("OrfRep", "Gene", "Drug", "num", "conc_num", "conc_num_factor_factor")
         )$df_with_stats
         )$df_with_stats
-      reference_results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num"))
-      df_calculations_reference <- reference_results$calculations
-      df_interactions_reference <- reference_results$interactions
+        message("Calculating reference strain interaction scores")
+      results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug", "num"))
+      df_calculations_reference <- results$calculations
+      df_interactions_reference <- results$interactions
+      df_interactions_reference_joined <- results$full_data
+      write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
+      write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
 
 
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       df_deletion <- df_na_stats %>% # formerly X2
       df_deletion <- df_na_stats %>% # formerly X2
         filter(OrfRep != strain) %>%
         filter(OrfRep != strain) %>%
         filter(!is.na(L)) %>%
         filter(!is.na(L)) %>%
-        group_by(conc_num) %>%
+        group_by(OrfRep, Gene, conc_num) %>%
         mutate(
         mutate(
           max_l_theoretical = max(max_L, na.rm = TRUE),
           max_l_theoretical = max(max_L, na.rm = TRUE),
           L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
           L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
@@ -1369,81 +1438,60 @@ main <- function() {
           L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
           L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
         ungroup()
         ungroup()
 
 
-      message("Calculating deletion strain(s) interactions scores")
+      message("Calculating deletion strain(s) summary statistics")
       df_deletion_stats <- calculate_summary_stats(
       df_deletion_stats <- calculate_summary_stats(
         df = df_deletion,
         df = df_deletion,
         variables = c("L", "K", "r", "AUC"),
         variables = c("L", "K", "r", "AUC"),
-        group_vars = c("OrfRep", "Gene", "conc_num")
+        group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
         )$df_with_stats
         )$df_with_stats
-      deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene"))
-      df_calculations <- deletion_results$calculations
-      df_interactions <- deletion_results$interactions
-
-      # Writing Z-Scores to file
-      write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
-      write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
+      message("Calculating deletion strain(s) interactions scores")
+      results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
+      df_calculations <- results$calculations
+      df_interactions <- results$interactions
+      df_interactions_joined <- results$full_data
       write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
       write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
       write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
       write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
 
 
       # Create interaction plots
       # Create interaction plots
       message("Generating reference interaction plots")
       message("Generating reference interaction plots")
-      reference_plot_configs <- generate_interaction_plot_configs(
-        df_reference_stats, df_calculations_reference, df_interactions_reference, "reference")
+      reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, df_bg_stats, "reference")
       generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
       generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
 
 
       message("Generating deletion interaction plots")
       message("Generating deletion interaction plots")
-      deletion_plot_configs <- generate_interaction_plot_configs(
-        df_deletion_stats, df_calculations, df_interactions, "deletion")
+      deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, df_bg_stats, "deletion")
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
 
 
-      # Define conditions for enhancers and suppressors
-      # TODO Add to study config?
-      threshold <- 2
-      enhancer_condition_L <- df_interactions$Avg_Zscore_L >= threshold
-      suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -threshold
-      enhancer_condition_K <- df_interactions$Avg_Zscore_K >= threshold
-      suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -threshold
-      
-      # Subset data
+      message("Writing enhancer/suppressor csv files")
+      interaction_threshold <- 2  # TODO add to study config?
+      enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
+      suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
+      enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
+      suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
       enhancers_L <- df_interactions[enhancer_condition_L, ]
       enhancers_L <- df_interactions[enhancer_condition_L, ]
       suppressors_L <- df_interactions[suppressor_condition_L, ]
       suppressors_L <- df_interactions[suppressor_condition_L, ]
       enhancers_K <- df_interactions[enhancer_condition_K, ]
       enhancers_K <- df_interactions[enhancer_condition_K, ]
       suppressors_K <- df_interactions[suppressor_condition_K, ]
       suppressors_K <- df_interactions[suppressor_condition_K, ]
-      
-      # Save enhancers and suppressors
-      message("Writing enhancer/suppressor csv files")
+      enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
+      enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
       write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
       write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
       write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
       write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
       write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
       write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
       write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
       write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
-      
-      # Combine conditions for enhancers and suppressors
-      enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
-      enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
-      
-      # Save combined enhancers and suppressors
       write.csv(enhancers_and_suppressors_L,
       write.csv(enhancers_and_suppressors_L,
         file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
         file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
       write.csv(enhancers_and_suppressors_K,
       write.csv(enhancers_and_suppressors_K,
         file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
         file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
       
       
-      # Handle linear model based enhancers and suppressors
-      lm_threshold <- 2 # TODO add to study config?
-      enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_threshold, ]
-      suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_threshold, ]
-      enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_threshold, ]
-      suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_threshold, ]
-      
-      # Save linear model based enhancers and suppressors
       message("Writing linear model enhancer/suppressor csv files")
       message("Writing linear model enhancer/suppressor csv files")
-      write.csv(enhancers_lm_L,
-        file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
-      write.csv(suppressors_lm_L,
-        file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
-      write.csv(enhancers_lm_K,
-        file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
-      write.csv(suppressors_lm_K,
-        file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
+      lm_interaction_threshold <- 2 # TODO add to study config?
+      enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
+      suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
+      enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
+      suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
+      write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
+      write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
+      write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
+      write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
 
 
       message("Generating rank plots")
       message("Generating rank plots")
       rank_plot_configs <- generate_rank_plot_configs(
       rank_plot_configs <- generate_rank_plot_configs(
@@ -1463,7 +1511,7 @@ main <- function() {
       generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
       generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
         plot_configs = rank_lm_plot_configs)
         plot_configs = rank_lm_plot_configs)
 
 
-      overlap_threshold <- 2
+      overlap_threshold <- 2 # TODO add to study config?
       df_interactions_filtered <- df_interactions %>%
       df_interactions_filtered <- df_interactions %>%
         filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
         filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
         mutate(
         mutate(