From 3833f751849e8a7d0aec76de06ea10562eccfbfb Mon Sep 17 00:00:00 2001
From: Bryan Roessler <bryanroessler@gmail.com>
Date: Tue, 10 Sep 2024 03:08:15 -0400
Subject: [PATCH] Add rank plot configs

---
 .../apps/r/calculate_interaction_zscores5.R   | 183 ++++++++++++------
 1 file changed, 119 insertions(+), 64 deletions(-)

diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R
index 004a16cf..556c4c95 100644
--- a/workflow/apps/r/calculate_interaction_zscores5.R
+++ b/workflow/apps/r/calculate_interaction_zscores5.R
@@ -231,16 +231,17 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
     ungroup()
 
   interaction_scores <- interaction_scores %>%
-    mutate(
-      Raw_Shift_L = mean_L[[1]] - bg_means$L,
-      Raw_Shift_K = mean_K[[1]] - bg_means$K,
-      Raw_Shift_r = mean_r[[1]] - bg_means$r,
-      Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
-      Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]],
-      Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]],
-      Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]],
-      Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]]
-    )
+    group_by(across(all_of(group_vars))) %>%
+      mutate(
+        Raw_Shift_L = mean_L[[1]] - bg_means$L,
+        Raw_Shift_K = mean_K[[1]] - bg_means$K,
+        Raw_Shift_r = mean_r[[1]] - bg_means$r,
+        Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
+        Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]],
+        Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]],
+        Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]],
+        Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]]
+      )
 
   interaction_scores <- interaction_scores %>%
     mutate(
@@ -263,28 +264,33 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
     )
 
-  # Calculate Z-scores for each variable
+  # Calculate linear models and interaction scores
   interaction_scores <- interaction_scores %>%
     mutate(
+      lm_L = lm(Delta_L ~ conc_num_factor),
+      lm_K = lm(Delta_K ~ conc_num_factor),
+      lm_r = lm(Delta_r ~ conc_num_factor),
+      lm_AUC = lm(Delta_AUC ~ conc_num_factor),
       Zscore_L = Delta_L / WT_sd_L,
       Zscore_K = Delta_K / WT_sd_K,
       Zscore_r = Delta_r / WT_sd_r,
       Zscore_AUC = Delta_AUC / WT_sd_AUC
     )
 
-  # Calculate linear models
   interaction_scores <- interaction_scores %>%
     mutate(
-      lm_L = lm(Delta_L ~ conc_num_factor),
-      lm_K = lm(Delta_K ~ conc_num_factor),
-      lm_r = lm(Delta_r ~ conc_num_factor),
-      lm_AUC = Dlm(Delta_AUC ~ conc_num_factor)
+      Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
+      Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
+      Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
+      Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
     )
 
-  # Calculate linear models and interaction scores
   interaction_scores_all <- interaction_scores %>%
-    group_by(across(all_of(group_vars))) %>%
     mutate(
+      Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
+      Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
+      Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
+      Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
       lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
       lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
       lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
@@ -301,11 +307,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
       Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
       Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
-      Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE),
-      Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / num_non_removed_concs,
-      Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / num_non_removed_concs,
-      Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / (total_conc_num - 1),
-      Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / (total_conc_num - 1)
+      Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
     )
 
   # Arrange results by Z_lm_L and NG
@@ -541,7 +543,6 @@ adjust_missing_and_rank <- function(df, variables) {
   return(df)
 }
 
-
 main <- function() {
   lapply(names(args$experiments), function(exp_name) {
     exp <- args$experiments[[exp_name]]
@@ -625,7 +626,8 @@ main <- function() {
       list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
         title = "Raw L vs K before QC",
         color_var = "conc_num",
-        legend_position = "right")
+        legend_position = "right"
+      )
     )
 
     above_threshold_plots <- list(
@@ -678,15 +680,10 @@ main <- function() {
           error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
           
           # Create the plot configuration
-          plot_config <- list(
-            df = df_plot,
-            x_var = "scan",
-            y_var = var,
-            plot_type = plot_type,
+          plot_config <- list(df = df_plot, x_var = "scan", y_var = var, plot_type = plot_type,
             title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
-            error_bar = error_bar,
-            color_var = "conc_num"
-          )
+            error_bar = error_bar, color_var = "conc_num")
+
           plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
         }
       }
@@ -718,14 +715,16 @@ main <- function() {
       list(df = X_outside_2SD_K, x_var = "l", y_var = "K", plot_type = "scatter",
         title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
         color_var = "conc_num",
-        legend_position = "right")
+        legend_position = "right"
+      )
     )
 
     delta_bg_outside_2sd_k_plots <- list(
       list(df = X_outside_2SD_K, x_var = "delta_bg", y_var = "K", plot_type = "scatter",
         title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
         color_var = "conc_num",
-        legend_position = "right")
+        legend_position = "right"
+      )
     )
 
     # Generate and save plots for each QC step
@@ -744,7 +743,6 @@ main <- function() {
     # TODO: Originally this filtered L NA's
     # Let's try to avoid for now since stats have already been calculated
 
-
     # Process background strains
     bg_strains <- c("YDL227C")
     lapply(bg_strains, function(strain) {
@@ -847,10 +845,10 @@ main <- function() {
       
       # Save enhancers and suppressors
       message("Writing enhancer/suppressor csv files")
-      write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
-      write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
-      write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
-      write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE)
+      write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L.csv"), row.names = FALSE)
+      write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L.csv"), row.names = FALSE)
+      write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K.csv"), row.names = FALSE)
+      write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K.csv"), row.names = FALSE)
       
       # Combine conditions for enhancers and suppressors
       enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
@@ -858,9 +856,9 @@ main <- function() {
       
       # Save combined enhancers and suppressors
       write.csv(enhancers_and_suppressors_L,
-        file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE)
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_L.csv"), row.names = FALSE)
       write.csv(enhancers_and_suppressors_K,
-        file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_K.csv"), row.names = FALSE)
       
       # Handle linear model based enhancers and suppressors
       lm_threshold <- 2
@@ -872,37 +870,94 @@ main <- function() {
       # Save linear model based enhancers and suppressors
       message("Writing linear model enhancer/suppressor csv files")
       write.csv(enhancers_lm_L,
-        file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE)
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L_lm.csv"), row.names = FALSE)
       write.csv(suppressors_lm_L,
-        file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE)
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L_lm.csv"), row.names = FALSE)
       write.csv(enhancers_lm_K,
-        file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE)
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K_lm.csv"), row.names = FALSE)
       write.csv(suppressors_lm_K,
-        file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
 
-      # Correlation plots
-      lm_list <- list(
-        lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
-        lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
-        lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered),
-        lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
-        lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
-        lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
+      zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions)
+      
+      # Generate ranked plots
+      rank_plot_config <- list(
+        # L Rank plots with different SD thresholds
+        list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank",
+            title = "Average Z score vs. Rank for L above 1SD", sd_band = 1,
+            enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
+              label = paste("Deletion Enhancers =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 1, ]))),
+            suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
+              label = paste("Deletion Suppressors =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -1, ])))
+        ),
+        list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank",
+            title = "Average Z score vs. Rank for L above 2SD", sd_band = 2,
+            enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
+              label = paste("Deletion Enhancers =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 2, ]))),
+            suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
+              label = paste("Deletion Suppressors =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -2, ])))
+        ),
+        list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank",
+            title = "Average Z score vs. Rank for L above 3SD", sd_band = 3,
+            enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
+              label = paste("Deletion Enhancers =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 3, ]))),
+            suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
+              label = paste("Deletion Suppressors =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -3, ])))
+        ),
+        
+        # K Rank plots with different SD thresholds
+        list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank",
+            title = "Average Z score vs. Rank for K above 1SD", sd_band = 1,
+            enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
+              label = paste("Deletion Enhancers =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 1, ]))),
+            suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
+              label = paste("Deletion Suppressors =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -1, ])))
+        ),
+        list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank",
+            title = "Average Z score vs. Rank for K above 2SD", sd_band = 2,
+            enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
+              label = paste("Deletion Enhancers =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 2, ]))),
+            suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
+              label = paste("Deletion Suppressors =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -2, ])))
+        ),
+        list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank",
+            title = "Average Z score vs. Rank for K above 3SD", sd_band = 3,
+            enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
+              label = paste("Deletion Enhancers =",
+                nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 3, ]))),
+            suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
+              label = paste("Deletion Suppressors =",
+              nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -3, ])))
+        )
       )
 
+      # Generate and save rank plots
+      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
+        plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
+
+      # # Correlation plots
+      # lm_list <- list(
+      #   lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
+      #   lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
+      #   lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered),
+      #   lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
+      #   lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
+      #   lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
+      # )
+
       lm_summaries <- lapply(lm_list, summary)
       correlation_plot_configs <- correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
       generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
-      
-      # Generate ranked plots
-      rank_plot_config <- list(
-        list(x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "scatter", title = "Rank vs Avg Z score for L"),
-        list(x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "scatter", title = "Rank vs Avg Z score for K"),
-        list(x_var = "r_Rank", y_var = "Avg_Zscore_r", plot_type = "scatter", title = "Rank vs Avg Z score for r"),
-        list(x_var = "AUC_Rank", y_var = "Avg_Zscore_AUC", plot_type = "scatter", title = "Rank vs Avg Z score for AUC")
-      )
-      # Generate and save rank plots using the existing plotting framework
-      generate_and_save_plots(zscores_interactions_filtered, output_dir, rank_plot_config)
     })
   })
 }