10 ماه پیش · 5518005d03
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -530,20 +530,35 @@ generate_interaction_plot_configs <- function(df, variables) {
 
				   return(plot_configs)
			
 
				 }
			
 
				 
			
 
				-generate_rank_plot_configs <- function(df, rank_var, zscore_var, label_prefix, is_lm = FALSE) {
			
 
				+# Adjust missing values and calculate ranks
			
 
				+adjust_missing_and_rank <- function(df, variables) {
			
 
				+
			
 
				+  # Adjust missing values in Avg_Zscore and Z_lm columns, and apply rank to the specified variables
			
 
				+  df <- df %>%
			
 
				+    mutate(across(all_of(variables), list(
			
 
				+      Avg_Zscore = ~ if_else(is.na(get(paste0("Avg_Zscore_", cur_column()))), 0.001, get(paste0("Avg_Zscore_", cur_column()))),
			
 
				+      Z_lm = ~ if_else(is.na(get(paste0("Z_lm_", cur_column()))), 0.001, get(paste0("Z_lm_", cur_column()))),
			
 
				+      Rank = ~ rank(get(paste0("Avg_Zscore_", cur_column()))),
			
 
				+      Rank_lm = ~ rank(get(paste0("Z_lm_", cur_column())))
			
 
				+    ), .names = "{fn}_{col}"))
			
 
				+
			
 
				+  return(df)
			
 
				+}
			
 
				+
			
 
				+generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) {
			
 
				   configs <- list()
			
 
				   
			
 
				   # Adjust titles for _lm plots if is_lm is TRUE
			
 
				   plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for"
			
 
				   
			
 
				+  # Annotated version (with text)
			
 
				   for (sd_band in c(1, 2, 3)) {
			
 
				-    # Annotated version (with text)
			
 
				     configs[[length(configs) + 1]] <- list(
			
 
				       df = df,
			
 
				       x_var = rank_var,
			
 
				       y_var = zscore_var,
			
 
				       plot_type = "rank",
			
 
				-      title = paste(plot_title_prefix, label_prefix, "above", sd_band, "SD"),
			
 
				+      title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
			
 
				       sd_band = sd_band,
			
 
				       enhancer_label = list(
			
 
				         x = nrow(df) / 2, y = 10,
			
@@ -554,14 +569,16 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, label_prefix, i
 
				         label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
			
 
				       )
			
 
				     )
			
 
				-    
			
 
				-    # Non-annotated version (_notext)
			
 
				+  }
			
 
				+  
			
 
				+  # Non-annotated version (_notext)
			
 
				+  for (sd_band in c(1, 2, 3)) {
			
 
				     configs[[length(configs) + 1]] <- list(
			
 
				       df = df,
			
 
				       x_var = rank_var,
			
 
				       y_var = zscore_var,
			
 
				       plot_type = "rank",
			
 
				-      title = paste(plot_title_prefix, label_prefix, "above", sd_band, "SD"),
			
 
				+      title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
			
 
				       sd_band = sd_band,
			
 
				       enhancer_label = NULL,  # No annotations for _notext
			
 
				       suppressor_label = NULL  # No annotations for _notext
			
@@ -571,32 +588,26 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, label_prefix, i
 
				   return(configs)
			
 
				 }
			
 
				 
			
 
				-generate_correlation_plot_configs <- function(df, lm_list, lm_summaries) {
			
 
				-  lapply(seq_along(lm_list), function(i) {
			
 
				-    r_squared <- round(lm_summaries[[i]]$r.squared, 3)
			
 
				-    list(
			
 
				-      x_var = names(lm_list)[i][1],
			
 
				-      y_var = names(lm_list)[i][2],
			
 
				-      plot_type = "scatter",
			
 
				-      title = paste("Correlation between", names(lm_list)[i][1], "and", names(lm_list)[i][2]),
			
 
				-      annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
			
 
				-    )
			
 
				-  })
			
 
				-}
			
 
				+generate_correlation_plot_configs <- function(df, variables) {
			
 
				+  plot_configs <- list()
			
 
				 
			
 
				-# Adjust missing values and calculate ranks
			
 
				-adjust_missing_and_rank <- function(df, variables) {
			
 
				+  for (variable in variables) {
			
 
				+    z_lm_var <- paste0("Z_lm_", variable)
			
 
				+    avg_zscore_var <- paste0("Avg_Zscore_", variable)
			
 
				+    lm_r_squared_col <- paste0("lm_R_squared_", variable)
			
 
				 
			
 
				-  # Adjust missing values in Avg_Zscore and Z_lm columns, and apply rank to the specified variables
			
 
				-  df <- df %>%
			
 
				-    mutate(across(all_of(variables), list(
			
 
				-      Avg_Zscore = ~ if_else(is.na(get(paste0("Avg_Zscore_", cur_column()))), 0.001, get(paste0("Avg_Zscore_", cur_column()))),
			
 
				-      Z_lm = ~ if_else(is.na(get(paste0("Z_lm_", cur_column()))), 0.001, get(paste0("Z_lm_", cur_column()))),
			
 
				-      Rank = ~ rank(get(paste0("Avg_Zscore_", cur_column()))),
			
 
				-      Rank_lm = ~ rank(get(paste0("Z_lm_", cur_column())))
			
 
				-    ), .names = "{fn}_{col}"))
			
 
				+    plot_configs[[length(plot_configs) + 1]] <- list(
			
 
				+      df = df,
			
 
				+      x_var = avg_zscore_var,
			
 
				+      y_var = z_lm_var,
			
 
				+      plot_type = "correlation",
			
 
				+      title = paste("Avg Zscore vs lm", variable),
			
 
				+      color_var = "Overlap",
			
 
				+      correlation_text = paste("R-squared =", round(df[[lm_r_squared_col]][1], 2))
			
 
				+    )
			
 
				+  }
			
 
				 
			
 
				-  return(df)
			
 
				+  return(plot_configs)
			
 
				 }
			
 
				 
			
 
				 main <- function() {
			
@@ -674,10 +685,8 @@ main <- function() {
 
				     write.csv(l_within_2sd_k_stats, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
			
 
				     write.csv(l_outside_2sd_k_stats, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
			
 
				 
			
 
				-    # Plots
			
 
				-
			
 
				-    # Print quality control graphs before removing data due to contamination and
			
 
				-    # adjusting missing data to max theoretical values
			
 
				+    # Plot configurations
			
 
				+    # Each plots list corresponds to a file
			
 
				     message("Generating QC plot configurations")
			
 
				     l_vs_k_plots <- list(
			
 
				       list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
			
@@ -820,9 +829,9 @@ main <- function() {
 
				       
			
 
				       # Recalculate summary statistics for the background strain
			
 
				       message("Calculating summary statistics for background strain")
			
 
				-      ss <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
			
 
				-      summary_stats_bg <- ss$summary_stats
			
 
				-      df_bg_stats <- ss$df_with_stats
			
 
				+      ss_bg <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
			
 
				+      summary_stats_bg <- ss_bg$summary_stats
			
 
				+      # df_bg_stats <- ss_bg$df_with_stats
			
 
				       write.csv(summary_stats_bg,
			
 
				         file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
			
 
				         row.names = FALSE)
			
@@ -935,6 +944,7 @@ main <- function() {
 
				       write.csv(suppressors_lm_K,
			
 
				         file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
			
 
				 
			
 
				+      # TODO needs explanation
			
 
				       zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions)
			
 
				       
			
 
				       rank_plot_configs <- c(
			
@@ -945,30 +955,56 @@ main <- function() {
 
				         plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
			
 
				 
			
 
				       rank_lm_plot_config <- c(
			
 
				-        generate_rank_lm_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L"),
			
 
				-        generate_rank_lm_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K")
			
 
				+        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
			
 
				+        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)
			
 
				       )
			
 
				       generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
			
 
				         plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
			
 
				       
			
 
				-      # interaction_scores_filtered
			
 
				+      # Formerly X_NArm
			
 
				+      zscores_interactions_filtered <- zscores_interactions %>%
			
 
				+        group_by(across(all_of(group_vars))) %>%
			
 
				+          filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
			
 
				+      
			
 
				+      zscores_interactions_filtered <- zscores_interactions_filtered %>%
			
 
				+        mutate(
			
 
				+          Overlap = case_when(
			
 
				+            Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
			
 
				+            Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both",
			
 
				+            Z_lm_L >= 2 & Avg_Zscore_L <= 2 ~ "Deletion Enhancer lm only",
			
 
				+            Z_lm_L <= -2 & Avg_Zscore_L >= -2 ~ "Deletion Suppressor lm only",
			
 
				+            Z_lm_L >= 2 & Avg_Zscore_L <= -2 ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
			
 
				+            Z_lm_L <= -2 & Avg_Zscore_L >= 2 ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
			
 
				+            TRUE ~ "No Effect"
			
 
				+          ),
			
 
				+          lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
			
 
				+          lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
			
 
				+          lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
			
 
				+          lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
			
 
				+        ) %>%
			
 
				+        ungroup()
			
 
				+
			
 
				+      rank_plot_configs <- c(
			
 
				+        generate_rank_plot_configs(zscores_interactions_filtered, "Rank_L", "Avg_Zscore_L", "L"),
			
 
				+        generate_rank_plot_configs(zscores_interactions_filtered, "Rank_K", "Avg_Zscore_K", "K")
			
 
				+      )
			
 
				+      
			
 
				+      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
			
 
				+        plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
			
 
				+      
			
 
				+      rank_lm_plot_configs <- c(
			
 
				+        generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
			
 
				+        generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)
			
 
				+      )
			
 
				+      
			
 
				+      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
			
 
				+        plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
			
 
				 
			
 
				+      correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, variables)
			
 
				+      generate_and_save_plots(output_dir = out_dir, file_name = "Avg_Zscore_vs_lm_NA_rm",
			
 
				+        plot_configs = correlation_plot_configs, grid_layout = list(ncol = 2, nrow = 2))
			
 
				 
			
 
				-      # lm_summaries <- lapply(lm_list, summary)
			
 
				-      # correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
			
 
				-      # generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
			
 
				     })
			
 
				   })
			
 
				 }
			
 
				 main()
			
 
				-
			
 
				-
			
 
				-      # # Correlation plots
			
 
				-      # lm_list <- list(
			
 
				-      #   lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
			
 
				-      #   lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
			
 
				-      #   lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered),
			
 
				-      #   lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
			
 
				-      #   lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
			
 
				-      #   lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
			
 
				-      # )