Quellcode durchsuchen

Try separating interaction dfs

Bryan Roessler vor 7 Monaten
Ursprung
Commit
4b273db78d
1 geänderte Dateien mit 206 neuen und 200 gelöschten Zeilen
  1. 206 200
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 206 - 200
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -160,10 +160,6 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
       conc_num_factor_factor = as.factor(conc_num)
     )
 
-  # Set the max concentration across the whole dataframe
-  df <- df %>%
-    mutate(max_conc = max(df$conc_num_factor, na.rm = TRUE))
-
   return(df)
 }
 
@@ -215,8 +211,10 @@ calculate_summary_stats <- function(df, variables, group_vars) {
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
 }
 
-calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overlap_threshold = 2) {
+calculate_interaction_scores <- function(df, df_bg, group_vars) {
 
+  max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
+  
   # Include background statistics per concentration
   bg_stats <- df_bg %>%
     group_by(conc_num, conc_num_factor) %>%
@@ -231,14 +229,14 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
       WT_sd_AUC = first(sd_AUC),
       .groups = "drop"
     )
-
+  
   # Calculate total number of concentrations
   total_conc_num <- length(unique(df$conc_num))
-
+  
   # Join background statistics to df
   calculations <- df %>%
     left_join(bg_stats, by = c("conc_num", "conc_num_factor"))
-
+  
   # Perform calculations
   calculations <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
@@ -248,38 +246,38 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
       DB = sum(DB, na.rm = TRUE),
       SM = sum(SM, na.rm = TRUE),
       num_non_removed_concs = n_distinct(conc_num[DB != 1]),
-
+      
       # Raw shifts
       Raw_Shift_L = mean_L - WT_L,
       Raw_Shift_K = mean_K - WT_K,
       Raw_Shift_r = mean_r - WT_r,
       Raw_Shift_AUC = mean_AUC - WT_AUC,
-
+      
       # Z shifts
       Z_Shift_L = Raw_Shift_L / WT_sd_L,
       Z_Shift_K = Raw_Shift_K / WT_sd_K,
       Z_Shift_r = Raw_Shift_r / WT_sd_r,
       Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC,
-
+      
       # Expected values
       Exp_L = WT_L + Raw_Shift_L,
       Exp_K = WT_K + Raw_Shift_K,
       Exp_r = WT_r + Raw_Shift_r,
       Exp_AUC = WT_AUC + Raw_Shift_AUC,
-
+      
       # Deltas
       Delta_L = mean_L - Exp_L,
       Delta_K = mean_K - Exp_K,
       Delta_r = mean_r - Exp_r,
       Delta_AUC = mean_AUC - Exp_AUC,
-
+      
       # Adjust Deltas for NG and SM
       Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
       Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
       Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
       Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
       Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
-
+      
       # Z-scores
       Zscore_L = Delta_L / WT_sd_L,
       Zscore_K = Delta_K / WT_sd_K,
@@ -287,7 +285,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
       Zscore_AUC = Delta_AUC / WT_sd_AUC
     ) %>%
     ungroup()
-
+  
   # Fit linear models within each group
   calculations <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
@@ -321,7 +319,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
       return(data)
     }) %>%
     ungroup()
-
+  
   # Compute lm means and sds across all data without grouping
   lm_means_sds <- calculations %>%
     summarise(
@@ -334,7 +332,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
       lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
       lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
     )
-
+  
   # Apply global lm means and sds to calculate Z_lm_*
   calculations <- calculations %>%
     mutate(
@@ -343,7 +341,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
       Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r,
       Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC
     )
-
+  
   # Build interactions data frame
   interactions <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
@@ -352,75 +350,46 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
       Avg_Zscore_K = mean(Zscore_K, na.rm = TRUE),
       Avg_Zscore_r = mean(Zscore_r, na.rm = TRUE),
       Avg_Zscore_AUC = mean(Zscore_AUC, na.rm = TRUE),
-
+      
       # Interaction Z-scores
       Z_lm_L = first(Z_lm_L),
       Z_lm_K = first(Z_lm_K),
       Z_lm_r = first(Z_lm_r),
       Z_lm_AUC = first(Z_lm_AUC),
-
+      
       # Raw Shifts
       Raw_Shift_L = first(Raw_Shift_L),
       Raw_Shift_K = first(Raw_Shift_K),
       Raw_Shift_r = first(Raw_Shift_r),
       Raw_Shift_AUC = first(Raw_Shift_AUC),
-
+      
       # Z Shifts
       Z_Shift_L = first(Z_Shift_L),
       Z_Shift_K = first(Z_Shift_K),
       Z_Shift_r = first(Z_Shift_r),
       Z_Shift_AUC = first(Z_Shift_AUC),
-
+      
       # NG, DB, SM values
       NG = first(NG),
       DB = first(DB),
       SM = first(SM),
-
+      
       # R Squared values
       R_Squared_L = first(R_Squared_L),
       R_Squared_K = first(R_Squared_K),
       R_Squared_r = first(R_Squared_r),
       R_Squared_AUC = first(R_Squared_AUC),
-
+      
+      # Include Drug
+      Drug = first(Drug),
+      
       .groups = "drop"
     )
-
-  # Create the final calculations and interactions dataframes with required columns
-  calculations_df <- calculations %>%
-    select(
-      all_of(group_vars),
-      conc_num, conc_num_factor, conc_num_factor_factor,
-      N, NG, DB, SM,
-      mean_L, mean_K, mean_r, mean_AUC,
-      Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
-      Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
-      WT_L, WT_K, WT_r, WT_AUC,
-      WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
-      Exp_L, Exp_K, Exp_r, Exp_AUC,
-      Delta_L, Delta_K, Delta_r, Delta_AUC,
-      Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
-    )
-
-  interactions_df <- interactions %>%
-    select(
-      all_of(group_vars),
-      NG, DB, SM,
-      Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
-      Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
-      Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
-      Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
-      R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC
-    )
-
-  # Create full_data by joining calculations_df and interactions_df
-  full_data <- calculations_df %>%
-    left_join(interactions_df, by = group_vars, suffix = c("", "_interaction"))
-
-  # Return the dataframes
+  
+  # Return the dataframes without creating full_data
   return(list(
-    calculations = calculations_df,
-    interactions = interactions_df,
-    full_data = full_data
+    calculations = calculations,
+    interactions = interactions
   ))
 }
 
@@ -491,40 +460,29 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
           "red"
         }
 
-        y_mean_prefix <- if (!is.null(config$error_bar_params$y_mean_prefix)) {
-          config$error_bar_params$y_mean_prefix
-        } else {
-          "mean_"
-        }
-        y_mean_col <- paste0(y_mean_prefix, config$y_var)
-
-        # Dynamically set y_sd_col based on the provided prefix in error_bar_params
-        y_sd_prefix <- if (!is.null(config$error_bar_params$y_sd_prefix)) {
-          config$error_bar_params$y_sd_prefix
-        } else {
-          "sd_"
-        }
-        y_sd_col <- paste0(y_sd_prefix, config$y_var)
-
-        if (!is.null(config$error_bar_params$center_point)) {
-          plot <- plot + geom_point(aes(
-            x = .data[[config$x_var]],
-            y = first(.data[[y_mean_col]])),
-            color = error_bar_color,
-            shape = 16)
-        }
-
-        # Use error_bar_params if provided, otherwise calculate from mean and sd
         if (!is.null(config$error_bar_params$ymin) && !is.null(config$error_bar_params$ymax)) {
-          plot <- plot + geom_errorbar(aes(
-            ymin = config$error_bar_params$ymin,
-            ymax = config$error_bar_params$ymax),
-            color = error_bar_color)
+          # Check if ymin and ymax are constants or column names
+          if (is.numeric(config$error_bar_params$ymin) && is.numeric(config$error_bar_params$ymax)) {
+            plot <- plot + geom_errorbar(aes(x = .data[[config$x_var]]),
+              ymin = config$error_bar_params$ymin,
+              ymax = config$error_bar_params$ymax,
+              color = error_bar_color)
+          } else {
+            plot <- plot + geom_errorbar(aes(
+              x = .data[[config$x_var]],
+              ymin = .data[[config$error_bar_params$ymin]],
+              ymax = .data[[config$error_bar_params$ymax]]
+            ), color = error_bar_color)
+          }
         } else {
+          # Original code for calculating from mean and sd
+          y_mean_col <- paste0("mean_", config$y_var)
+          y_sd_col <- paste0("sd_", config$y_var)
           plot <- plot + geom_errorbar(aes(
-            ymin = first(.data[[y_mean_col]]) - first(.data[[y_sd_col]]),
-            ymax = first(.data[[y_mean_col]]) + first(.data[[y_sd_col]])),
-            color = error_bar_color)
+            x = .data[[config$x_var]],
+            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
+            ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
+          ), color = error_bar_color)
         }
       }
 
@@ -756,8 +714,8 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
   return(list(plots = plot_configs))
 }
 
-generate_interaction_plot_configs <- function(df, type) {
-
+generate_interaction_plot_configs <- function(df, df_calculations, df_interactions, type) {
+  
   # Define the y-limits for the plots
   limits_map <- list(
     L = c(0, 130),
@@ -769,16 +727,16 @@ generate_interaction_plot_configs <- function(df, type) {
   stats_plot_configs <- list()
   stats_boxplot_configs <- list()
   delta_plot_configs <- list()
-
-  # Overall statistics plots
+  
+  # Overall statistics plots (use df)
   OrfRep <- first(df$OrfRep) # this should correspond to the reference strain
-
+  
   for (plot_type in c("scatter", "box")) {
-
+    
     for (var in names(limits_map)) {
       y_limits <- limits_map[[var]]
       y_span <- y_limits[2] - y_limits[1]
-
+      
       # Common plot configuration
       plot_config <- list(
         df = df,
@@ -790,7 +748,7 @@ generate_interaction_plot_configs <- function(df, type) {
         x_breaks = unique(df$conc_num_factor_factor),
         x_labels = as.character(unique(df$conc_num))
       )
-
+      
       # Add specific configurations for scatter and box plots
       if (plot_type == "scatter") {
         plot_config$plot_type <- "scatter"
@@ -803,91 +761,96 @@ generate_interaction_plot_configs <- function(df, type) {
           center_point = TRUE
         )
         plot_config$position <- "jitter"
-
-      annotations <- list(
-        list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min
-        list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
-        list(x = 0.25, y = y_limits[1], label = "SM =")
-      )
-
-      # Loop over unique x values and add NG, DB, SM values at calculated y positions
-      for (x_val in unique(df$conc_num_factor_factor)) {
-        current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
-        annotations <- append(annotations, list(
-          list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = first(current_df$NG, default = 0)),
-          list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = first(current_df$DB, default = 0)),
-          list(x = x_val, y = y_limits[1], label = first(current_df$SM, default = 0))
-        ))
-      }
-
-      plot_config$annotations <- annotations
+        
+        annotations <- list(
+          list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min
+          list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
+          list(x = 0.25, y = y_limits[1], label = "SM =")
+        )
+        
+        # Loop over unique x values and add NG, DB, SM values at calculated y positions
+        for (x_val in unique(df$conc_num_factor_factor)) {
+          current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
+          annotations <- append(annotations, list(
+            list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = sum(current_df$NG, na.rm = TRUE)),
+            list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = sum(current_df$DB, na.rm = TRUE)),
+            list(x = x_val, y = y_limits[1], label = sum(current_df$SM, na.rm = TRUE))
+          ))
+        }
+        
+        plot_config$annotations <- annotations
         
         # Append to scatter plot configurations
         stats_plot_configs <- append(stats_plot_configs, list(plot_config))
-
+        
       } else if (plot_type == "box") {
         plot_config$plot_type <- "box"
         plot_config$title <- sprintf("%s Boxplot RF for %s with SD", OrfRep, var)
         plot_config$position <- "dodge"  # Boxplots don't need jitter, use dodge instead
-
+        
         # Append to boxplot configurations
         stats_boxplot_configs <- append(stats_boxplot_configs, list(plot_config))
       }
     }
   }
-
-  # Delta interaction plots
+  
+  # Delta interaction plots (use df_calculations and df_interactions)
   if (type == "reference") {
     group_vars <- c("OrfRep", "Gene", "num")
   } else if (type == "deletion") {
     group_vars <- c("OrfRep", "Gene")
   }
-
+  
   delta_limits_map <- list(
     L = c(-60, 60),
     K = c(-60, 60),
     r = c(-0.6, 0.6),
     AUC = c(-6000, 6000)
   )
-
-  grouped_data <- df %>%
+  
+  grouped_data <- df_calculations %>%
     group_by(across(all_of(group_vars))) %>%
     group_split()
-
+  
   for (group_data in grouped_data) {
     OrfRep <- first(group_data$OrfRep)
     Gene <- first(group_data$Gene)
     num <- if ("num" %in% names(group_data)) first(group_data$num) else ""
-
+    
     if (type == "reference") {
         OrfRepTitle <- paste(OrfRep, Gene, num, sep = "_")
     } else if (type == "deletion") {
         OrfRepTitle <- OrfRep
     }
-
+    
+    # Get corresponding interaction row
+    interaction_row <- df_interactions %>%
+      filter(if_all(all_of(group_vars), ~ . == first(.))) %>%
+      slice(1)
+    
     for (var in names(delta_limits_map)) {
       y_limits <- delta_limits_map[[var]]
       y_span <- y_limits[2] - y_limits[1]
-
+      
       # Error bars
       WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0)
-
-      # Z_Shift and lm values
-      Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2)
-      Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2)
-      R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2)
-
-      # NG, DB, SM values
-      NG_value <- first(group_data$NG, default = 0)
-      DB_value <- first(group_data$DB, default = 0)
-      SM_value <- first(group_data$SM, default = 0)
-
-      # Use the pre-calculated lm intercept and slope from the dataframe
+      
+      # Z_Shift and lm values from interaction_row
+      Z_Shift_value <- round(first(interaction_row[[paste0("Z_Shift_", var)]], default = 0), 2)
+      Z_lm_value <- round(first(interaction_row[[paste0("Z_lm_", var)]], default = 0), 2)
+      R_squared_value <- round(first(interaction_row[[paste0("R_Squared_", var)]], default = 0), 2)
+      
+      # NG, DB, SM values from interaction_row
+      NG_value <- first(interaction_row$NG, default = 0)
+      DB_value <- first(interaction_row$DB, default = 0)
+      SM_value <- first(interaction_row$SM, default = 0)
+      
+      # Use the pre-calculated lm intercept and slope from group_data
       lm_intercept_col <- paste0("lm_intercept_", var)
       lm_slope_col <- paste0("lm_slope_", var)
       lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0)
       lm_slope_value <- first(group_data[[lm_slope_col]], default = 0)
-
+      
       plot_config <- list(
         df = group_data,
         plot_type = "scatter",
@@ -906,8 +869,9 @@ generate_interaction_plot_configs <- function(df, type) {
         ),
         error_bar = TRUE,
         error_bar_params = list(
-          ymin = 0 - (2 * WT_sd_value),
-          ymax = 0 + (2 * WT_sd_value),
+          # Passing constants directly
+          ymin = -2 * WT_sd_value,
+          ymax = 2 * WT_sd_value,
           color = "black"
         ),
         smooth = TRUE,
@@ -922,43 +886,45 @@ generate_interaction_plot_configs <- function(df, type) {
       delta_plot_configs <- append(delta_plot_configs, list(plot_config))
     }
   }
-
+  
   # Calculate dynamic grid layout
   grid_ncol <- 4
   num_plots <- length(delta_plot_configs)
   grid_nrow <- ceiling(num_plots / grid_ncol)
-
+  
   return(list(
     list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_plot_configs),
     list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_boxplot_configs),
-    list(grid_layout = list(ncol = 4, nrow = grid_nrow), plots = delta_plot_configs)
+    list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = delta_plot_configs)
   ))
 }
 
-generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
-  sd_bands <- c(1, 2, 3)
+generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
+  
   plot_configs <- list()
+  sd_bands <- c(1, 2, 3)
   
-  variables <- c("L", "K")
-
   # Adjust (if necessary) and rank columns
+  variables <- c("L", "K")
   for (variable in variables) {
     if (adjust) {
-      df[[paste0("Avg_Zscore_", variable)]] <- ifelse(is.na(df[[paste0("Avg_Zscore_", variable)]]), 0.001, df[[paste0("Avg_Zscore_", variable)]])
-      df[[paste0("Z_lm_", variable)]] <- ifelse(is.na(df[[paste0("Z_lm_", variable)]]), 0.001, df[[paste0("Z_lm_", variable)]])
+      df_interactions[[paste0("Avg_Zscore_", variable)]] <-
+        ifelse(is.na(df_interactions[[paste0("Avg_Zscore_", variable)]]), 0.001, df_interactions[[paste0("Avg_Zscore_", variable)]])
+      df_interactions[[paste0("Z_lm_", variable)]] <-
+        ifelse(is.na(df_interactions[[paste0("Z_lm_", variable)]]), 0.001, df_interactions[[paste0("Z_lm_", variable)]])
     }
-    df[[paste0("Rank_", variable)]] <- rank(df[[paste0("Avg_Zscore_", variable)]], na.last = "keep")
-    df[[paste0("Rank_lm_", variable)]] <- rank(df[[paste0("Z_lm_", variable)]], na.last = "keep")
+    df_interactions[[paste0("Rank_", variable)]] <- rank(df_interactions[[paste0("Avg_Zscore_", variable)]], na.last = "keep")
+    df_interactions[[paste0("Rank_lm_", variable)]] <- rank(df_interactions[[paste0("Z_lm_", variable)]], na.last = "keep")
   }
 
   # Helper function to create a plot configuration
   create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) {
-    num_enhancers <- sum(df[[zscore_var]] >= sd_band, na.rm = TRUE)
-    num_suppressors <- sum(df[[zscore_var]] <= -sd_band, na.rm = TRUE)
-    
+    num_enhancers <- sum(df_interactions[[zscore_var]] >= sd_band, na.rm = TRUE)
+    num_suppressors <- sum(df_interactions[[zscore_var]] <= -sd_band, na.rm = TRUE)
+
     # Default plot config
     plot_config <- list(
-      df = df,
+      df = df_interactions,
       x_var = rank_var,
       y_var = zscore_var,
       plot_type = "scatter",
@@ -975,18 +941,18 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
       x_label = "Rank",
       legend_position = "none"
     )
-    
+
     if (with_annotations) {
       # Add specific annotations for plots with annotations
       plot_config$annotations <- list(
         list(
-          x = median(df[[rank_var]], na.rm = TRUE),
-          y = max(df[[zscore_var]], na.rm = TRUE) * 0.9,
+          x = median(df_interactions[[rank_var]], na.rm = TRUE),
+          y = max(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9,
           label = paste("Deletion Enhancers =", num_enhancers)
         ),
         list(
-          x = median(df[[rank_var]], na.rm = TRUE),
-          y = min(df[[zscore_var]], na.rm = TRUE) * 0.9,
+          x = median(df_interactions[[rank_var]], na.rm = TRUE),
+          y = min(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9,
           label = paste("Deletion Suppressors =", num_suppressors)
         )
       )
@@ -1000,12 +966,12 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
     rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable)
     zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable)
     y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable)
-    
+
     # Loop through SD bands
     for (sd_band in sd_bands) {
       # Create plot with annotations
       plot_configs[[length(plot_configs) + 1]] <- create_plot_config(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE)
-      
+
       # Create plot without annotations
       plot_configs[[length(plot_configs) + 1]] <- create_plot_config(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = FALSE)
     }
@@ -1019,7 +985,7 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
   return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs))
 }
 
-generate_correlation_plot_configs <- function(df, correlation_stats) {
+generate_correlation_plot_configs <- function(df_interactions) {
   # Define relationships for different-variable correlations
   relationships <- list(
     list(x = "L", y = "K"),
@@ -1030,6 +996,23 @@ generate_correlation_plot_configs <- function(df, correlation_stats) {
     list(x = "r", y = "AUC")
   )
 
+  correlation_stats <- list()
+
+  for (rel in relationships) {
+    x_var <- paste0("Z_lm_", rel$x)
+    y_var <- paste0("Z_lm_", rel$y)
+    lm_fit <- lm(df_interactions[[y_var]] ~ df_interactions[[x_var]])
+    intercept <- coef(lm_fit)[1]
+    slope <- coef(lm_fit)[2]
+    r_squared <- summary(lm_fit)$r.squared
+    relationship_name <- paste0(rel$x, "_vs_", rel$y)
+    correlation_stats[[relationship_name]] <- list(
+      intercept = intercept,
+      slope = slope,
+      r_squared = r_squared
+    )
+  }
+
   plot_configs <- list()
 
   # Iterate over the option to highlight cyan points (TRUE/FALSE)
@@ -1053,15 +1036,15 @@ generate_correlation_plot_configs <- function(df, correlation_stats) {
 
       # Construct plot config
       plot_config <- list(
-        df = df,
+        df = df_interactions,
         x_var = x_var,
         y_var = y_var,
         plot_type = "scatter",
         title = plot_label,
         annotations = list(
           list(
-            x = mean(df[[x_var]], na.rm = TRUE),
-            y = mean(df[[y_var]], na.rm = TRUE),
+            x = mean(df_interactions[[x_var]], na.rm = TRUE),
+            y = mean(df_interactions[[y_var]], na.rm = TRUE),
             label = paste("R-squared =", round(r_squared, 3))
           )
         ),
@@ -1371,9 +1354,8 @@ main <- function() {
         group_vars = c("OrfRep", "Gene", "num", "conc_num")
         )$df_with_stats
       reference_results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num"))
-      zscore_calculations_reference <- reference_results$calculations
-      zscore_interactions_reference <- reference_results$interactions
-      zscore_interactions_reference_joined <- reference_results$full_data
+      df_calculations_reference <- reference_results$calculations
+      df_interactions_reference <- reference_results$interactions
 
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       df_deletion <- df_na_stats %>% # formerly X2
@@ -1394,38 +1376,39 @@ main <- function() {
         group_vars = c("OrfRep", "Gene", "conc_num")
         )$df_with_stats
       deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene"))
-      zscore_calculations <- deletion_results$calculations
-      zscore_interactions <- deletion_results$interactions
-      zscore_interactions_joined <- deletion_results$full_data
+      df_calculations <- deletion_results$calculations
+      df_interactions <- deletion_results$interactions
 
       # Writing Z-Scores to file
-      write.csv(zscore_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
-      write.csv(zscore_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
-      write.csv(zscore_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
-      write.csv(zscore_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
+      write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
+      write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
+      write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
+      write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
 
       # Create interaction plots
       message("Generating reference interaction plots")
-      reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, "reference")
+      reference_plot_configs <- generate_interaction_plot_configs(
+        df_reference_stats, df_calculations_reference, df_interactions_reference, "reference")
       generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
 
       message("Generating deletion interaction plots")
-      deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, "deletion")
+      deletion_plot_configs <- generate_interaction_plot_configs(
+        df_deletion_stats, df_calculations, df_interactions, "deletion")
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
 
       # Define conditions for enhancers and suppressors
       # TODO Add to study config?
       threshold <- 2
-      enhancer_condition_L <- zscore_interactions$Avg_Zscore_L >= threshold
-      suppressor_condition_L <- zscore_interactions$Avg_Zscore_L <= -threshold
-      enhancer_condition_K <- zscore_interactions$Avg_Zscore_K >= threshold
-      suppressor_condition_K <- zscore_interactions$Avg_Zscore_K <= -threshold
+      enhancer_condition_L <- df_interactions$Avg_Zscore_L >= threshold
+      suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -threshold
+      enhancer_condition_K <- df_interactions$Avg_Zscore_K >= threshold
+      suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -threshold
       
       # Subset data
-      enhancers_L <- zscore_interactions[enhancer_condition_L, ]
-      suppressors_L <- zscore_interactions[suppressor_condition_L, ]
-      enhancers_K <- zscore_interactions[enhancer_condition_K, ]
-      suppressors_K <- zscore_interactions[suppressor_condition_K, ]
+      enhancers_L <- df_interactions[enhancer_condition_L, ]
+      suppressors_L <- df_interactions[suppressor_condition_L, ]
+      enhancers_K <- df_interactions[enhancer_condition_K, ]
+      suppressors_K <- df_interactions[suppressor_condition_K, ]
       
       # Save enhancers and suppressors
       message("Writing enhancer/suppressor csv files")
@@ -1435,8 +1418,8 @@ main <- function() {
       write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
       
       # Combine conditions for enhancers and suppressors
-      enhancers_and_suppressors_L <- zscore_interactions[enhancer_condition_L | suppressor_condition_L, ]
-      enhancers_and_suppressors_K <- zscore_interactions[enhancer_condition_K | suppressor_condition_K, ]
+      enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
+      enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
       
       # Save combined enhancers and suppressors
       write.csv(enhancers_and_suppressors_L,
@@ -1446,10 +1429,10 @@ main <- function() {
       
       # Handle linear model based enhancers and suppressors
       lm_threshold <- 2 # TODO add to study config?
-      enhancers_lm_L <- zscore_interactions[zscore_interactions$Z_lm_L >= lm_threshold, ]
-      suppressors_lm_L <- zscore_interactions[zscore_interactions$Z_lm_L <= -lm_threshold, ]
-      enhancers_lm_K <- zscore_interactions[zscore_interactions$Z_lm_K >= lm_threshold, ]
-      suppressors_lm_K <- zscore_interactions[zscore_interactions$Z_lm_K <= -lm_threshold, ]
+      enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_threshold, ]
+      suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_threshold, ]
+      enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_threshold, ]
+      suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_threshold, ]
       
       # Save linear model based enhancers and suppressors
       message("Writing linear model enhancer/suppressor csv files")
@@ -1464,7 +1447,7 @@ main <- function() {
 
       message("Generating rank plots")
       rank_plot_configs <- generate_rank_plot_configs(
-        df = zscore_interactions_joined,
+        df_interactions,
         is_lm = FALSE,
         adjust = TRUE
       )
@@ -1473,16 +1456,37 @@ main <- function() {
 
       message("Generating ranked linear model plots")
       rank_lm_plot_configs <- generate_rank_plot_configs(
-        df = zscore_interactions_joined,
+        df_interactions,
         is_lm = TRUE,
         adjust = TRUE
       )
       generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
         plot_configs = rank_lm_plot_configs)
 
+      overlap_threshold <- 2
+      df_interactions_filtered <- df_interactions %>%
+        filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
+        mutate(
+          Overlap = case_when(
+            Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
+            Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
+            Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only",
+            Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
+            Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only",
+            Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
+            Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
+            Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
+            TRUE ~ "No Effect"
+          ),
+          lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
+          lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
+          lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
+          lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
+        )
+
       message("Generating filtered ranked plots")
       rank_plot_filtered_configs <- generate_rank_plot_configs(
-        df = zscore_interactions_filtered,
+        df_interactions_filtered,
         is_lm = FALSE,
         adjust = FALSE,
         overlap_color = TRUE
@@ -1494,7 +1498,7 @@ main <- function() {
 
       message("Generating filtered ranked linear model plots")
       rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
-        df = zscore_interactions_filtered,
+        df_interactions_filtered,
         is_lm = TRUE,
         adjust = FALSE,
         overlap_color = TRUE
@@ -1505,7 +1509,9 @@ main <- function() {
         plot_configs = rank_plot_lm_filtered_configs)
 
       message("Generating correlation curve parameter pair plots")
-      correlation_plot_configs <- generate_correlation_plot_configs(zscore_interactions_filtered)
+      correlation_plot_configs <- generate_correlation_plot_configs(
+        df_interactions_filtered
+      )
       generate_and_save_plots(
         out_dir = out_dir,
         filename = "correlation_cpps",