Forráskód Böngészése

Create rank plots by SD band

Bryan Roessler 8 hónapja
szülő
commit
534c570335
1 módosított fájl, 62 hozzáadás és 68 törlés
  1. 62 68
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 62 - 68
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -707,7 +707,8 @@ generate_interaction_plot_configs <- function(df, variables) {
   ))
 }
 
-generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm = FALSE) {
+generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE) {
+
   # Adjust missing values and compute ranks for each interaction variable
   for (var in interaction_vars) {
     avg_zscore_col <- paste0("Avg_Zscore_", var)
@@ -731,7 +732,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm =
   # Initialize list to store plot configurations
   configs <- list()
   
-  # Generate plot configurations for rank variables
+  # Generate plot configurations for rank variables (L and K) with sd bands
   for (var in rank_vars) {
     if (is_lm) {
       rank_var <- paste0("Rank_lm_", var)
@@ -743,48 +744,45 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm =
       plot_title_prefix <- "Average Z score vs. Rank for"
     }
 
-    enhancers_count <- nrow(df[df[[zscore_var]] >= 1, ])
-    suppressors_count <- nrow(df[df[[zscore_var]] <= -1, ])
-
-    # Create Annotated Plot Configuration for rank variables
-    config_annotated <- list(
-      df = df,
-      x_var = rank_var,
-      y_var = zscore_var,
-      plot_type = "scatter",
-      title = paste(plot_title_prefix, var, "Rank Plot"),
-      sd_band = c(1, 2, 3),
-      enhancer_label = list(
-        x = nrow(df) / 2, 
-        y = 10,
-        label = paste("Deletion Enhancers =", enhancers_count)
-      ),
-      suppressor_label = list(
-        x = nrow(df) / 2, 
-        y = -10,
-        label = paste("Deletion Suppressors =", suppressors_count)
-      ),
-      shape = 3,
-      size = 0.1
-    )
-
-    # Create Non-Annotated Plot Configuration for rank variables
-    config_no_annotated <- list(
-      df = df,
-      x_var = rank_var,
-      y_var = zscore_var,
-      plot_type = "scatter",
-      title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
-      sd_band = c(1, 2, 3),
-      enhancer_label = NULL,
-      suppressor_label = NULL,
-      shape = 3,
-      size = 0.1
-    )
+    # Create plot configurations for each SD band
+    for (sd_band in c(1, 2, 3)) {
+      # Annotated version
+      configs[[length(configs) + 1]] <- list(
+        df = df,
+        x_var = rank_var,
+        y_var = zscore_var,
+        plot_type = "scatter",
+        title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
+        sd_band = sd_band,
+        enhancer_label = list(
+          x = nrow(df) / 2,
+          y = 10,
+          label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ]))
+        ),
+        suppressor_label = list(
+          x = nrow(df) / 2,
+          y = -10,
+          label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
+        ),
+        shape = 3,
+        size = 0.1
+      )
 
-    # Append configurations to the list
-    configs[[length(configs) + 1]] <- config_annotated
-    configs[[length(configs) + 1]] <- config_no_annotated
+      # Non-annotated version (_notext)
+      configs[[length(configs) + 1]] <- list(
+        df = df,
+        x_var = rank_var,
+        y_var = zscore_var,
+        plot_type = "scatter",
+        title = paste(plot_title_prefix, var, "above", sd_band, "SD No Annotations"),
+        sd_band = sd_band,
+        enhancer_label = NULL,
+        suppressor_label = NULL,
+        shape = 3,
+        size = 0.1,
+        position = "jitter"
+      )
+    }
   }
   
   return(list(
@@ -1228,41 +1226,36 @@ main <- function() {
         file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
 
       message("Generating rank plots")
-      rank_vars <- c("L", "K")
-
       # Generate rank plot configurations and adjust the dataframe
-      rank_plot_results <- generate_rank_plot_configs(
+      zscores_interactions_adjusted <- generate_rank_plot_configs(
         df = zscores_interactions,
         interaction_vars = interaction_vars,
-        rank_vars = rank_vars,
         is_lm = FALSE
-      )
-      zscores_interactions_adjusted <- rank_plot_results$adjusted_df
-
-      # Generate and save standard rank plots
-      generate_and_save_plots(
-        output_dir = out_dir,
-        file_name = "RankPlots",
-        plot_configs = rank_plot_results$plot_configs,
-        grid_layout = list(ncol = 3, nrow = 2)
-      )
+      )$adjusted_df
+
+      # Generate rank plots for L and K using standard ranks
+      rank_plot_configs <- generate_rank_plot_configs(
+        df = zscores_interactions_adjusted,
+        interaction_vars = interaction_vars,
+        is_lm = FALSE
+      )$plot_configs
+
+      # Save the generated rank plots for L and K
+      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
+        plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
 
-      # Generate rank plot configurations for lm variables
-      rank_lm_plot_results <- generate_rank_plot_configs(
+      # Generate rank plots for L and K using linear model (`lm`) ranks
+      rank_lm_plot_configs <- generate_rank_plot_configs(
         df = zscores_interactions_adjusted,
         interaction_vars = interaction_vars,
-        rank_vars = rank_vars,
         is_lm = TRUE
-      )
+      )$plot_configs
 
-      # Generate and save lm rank plots
-      generate_and_save_plots(
-        output_dir = out_dir,
-        file_name = "RankPlots_lm",
-        plot_configs = rank_lm_plot_results$plot_configs,
-        grid_layout = list(ncol = 3, nrow = 2)
-      )
+      # Save the linear model based rank plots for L and K
+      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
+        plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
       
+      message("Filtering and regenerating rank plots")
       # Formerly X_NArm
       zscores_interactions_filtered <- zscores_interactions %>%
         group_by(across(all_of(orf_group_vars))) %>%
@@ -1306,6 +1299,7 @@ main <- function() {
       generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
         plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
 
+      message("Generating correlation plots")
       correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, interaction_vars)
       generate_and_save_plots(output_dir = out_dir, file_name = "Avg_Zscore_vs_lm_NA_rm",
         plot_configs = correlation_plot_configs, grid_layout = list(ncol = 2, nrow = 2))