Browse Source

Refactor rank plots

Bryan Roessler 8 months ago
parent
commit
e00df4be45
1 changed files with 111 additions and 81 deletions
  1. 111 81
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 111 - 81
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -567,37 +567,6 @@ generate_box_plot <- function(plot, config) {
   return(plot)
   return(plot)
 }
 }
 
 
-# Adjust missing values and calculate ranks
-adjust_missing_and_rank <- function(df, variables) {
-  
-  # Loop over each variable
-  for (var in variables) {
-    # Construct column names
-    avg_zscore_col <- paste0("Avg_Zscore_", var)
-    z_lm_col <- paste0("Z_lm_", var)
-    rank_col <- paste0("Rank_", var)
-    rank_lm_col <- paste0("Rank_lm_", var)
-    
-    # Check if the columns exist in the data frame
-    if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
-      # Adjust missing values by replacing NA with 0.001
-      df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
-      df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
-      
-      # Compute ranks and create new columns
-      df[[rank_col]] <- rank(df[[avg_zscore_col]])
-      df[[rank_lm_col]] <- rank(df[[z_lm_col]])
-    } else {
-      warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in data frame"))
-    }
-    # Print some debugging output
-    print(df %>% select("OrfRep", "Gene", "conc_num", "conc_num_factor",
-      avg_zscore_col, z_lm_col, rank_col, rank_lm_col))
-  }
-
-  return(df)
-}
-
 generate_interaction_plot_configs <- function(df, variables) {
 generate_interaction_plot_configs <- function(df, variables) {
   configs <- list()
   configs <- list()
   
   
@@ -738,46 +707,90 @@ generate_interaction_plot_configs <- function(df, variables) {
   ))
   ))
 }
 }
 
 
-generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) {
+generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm = FALSE) {
+  # Adjust missing values and compute ranks for each interaction variable
+  for (var in interaction_vars) {
+    avg_zscore_col <- paste0("Avg_Zscore_", var)
+    z_lm_col <- paste0("Z_lm_", var)
+    rank_col <- paste0("Rank_", var)
+    rank_lm_col <- paste0("Rank_lm_", var)
+    
+    if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
+      # Replace NA with 0.001 for interaction variables
+      df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
+      df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
+      
+      # Compute ranks for interaction variables
+      df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
+      df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
+    } else {
+      warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in the data frame"))
+    }
+  }
+  
+  # Initialize list to store plot configurations
   configs <- list()
   configs <- list()
   
   
-  plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for"
-
-  # Single config with all sd bands
-  configs[[length(configs) + 1]] <- list(
-    df = df,
-    x_var = rank_var,
-    y_var = zscore_var,
-    plot_type = "scatter",
-    title = paste(plot_title_prefix, var, "Rank Plot"),
-    sd_band = c(1, 2, 3),  # Pass all sd bands at once
-    enhancer_label = list(
-      x = nrow(df) / 2, y = 10,
-      label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= 1, ]))  # Example for the first SD band
-    ),
-    suppressor_label = list(
-      x = nrow(df) / 2, y = -10,
-      label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -1, ]))
-    ),
-    shape = 3,
-    size = 0.1
-  )
+  # Generate plot configurations for rank variables
+  for (var in rank_vars) {
+    if (is_lm) {
+      rank_var <- paste0("Rank_lm_", var)
+      zscore_var <- paste0("Z_lm_", var)
+      plot_title_prefix <- "Interaction Z score vs. Rank for"
+    } else {
+      rank_var <- paste0("Rank_", var)
+      zscore_var <- paste0("Avg_Zscore_", var)
+      plot_title_prefix <- "Average Z score vs. Rank for"
+    }
 
 
-  # Non-annotated version
-  configs[[length(configs) + 1]] <- list(
-    df = df,
-    x_var = rank_var,
-    y_var = zscore_var,
-    plot_type = "scatter",
-    title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
-    sd_band = c(1, 2, 3),
-    enhancer_label = NULL,
-    suppressor_label = NULL,
-    shape = 3,
-    size = 0.1
-  )
+    enhancers_count <- nrow(df[df[[zscore_var]] >= 1, ])
+    suppressors_count <- nrow(df[df[[zscore_var]] <= -1, ])
 
 
-  return(configs)
+    # Create Annotated Plot Configuration for rank variables
+    config_annotated <- list(
+      df = df,
+      x_var = rank_var,
+      y_var = zscore_var,
+      plot_type = "scatter",
+      title = paste(plot_title_prefix, var, "Rank Plot"),
+      sd_band = c(1, 2, 3),
+      enhancer_label = list(
+        x = nrow(df) / 2, 
+        y = 10,
+        label = paste("Deletion Enhancers =", enhancers_count)
+      ),
+      suppressor_label = list(
+        x = nrow(df) / 2, 
+        y = -10,
+        label = paste("Deletion Suppressors =", suppressors_count)
+      ),
+      shape = 3,
+      size = 0.1
+    )
+
+    # Create Non-Annotated Plot Configuration for rank variables
+    config_no_annotated <- list(
+      df = df,
+      x_var = rank_var,
+      y_var = zscore_var,
+      plot_type = "scatter",
+      title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
+      sd_band = c(1, 2, 3),
+      enhancer_label = NULL,
+      suppressor_label = NULL,
+      shape = 3,
+      size = 0.1
+    )
+
+    # Append configurations to the list
+    configs[[length(configs) + 1]] <- config_annotated
+    configs[[length(configs) + 1]] <- config_no_annotated
+  }
+  
+  return(list(
+    adjusted_df = df,
+    plot_configs = configs
+  ))
 }
 }
 
 
 generate_correlation_plot_configs <- function(df, variables) {
 generate_correlation_plot_configs <- function(df, variables) {
@@ -1214,24 +1227,41 @@ main <- function() {
       write.csv(suppressors_lm_K,
       write.csv(suppressors_lm_K,
         file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
         file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
 
 
-      # TODO needs explanation
-      message("Adjusting and ranking interactions scores")
-      zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions, variables = interaction_vars)
-      
       message("Generating rank plots")
       message("Generating rank plots")
-      rank_plot_configs <- c(
-        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_L", "Avg_Zscore_L", "L"),
-        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
+      rank_vars <- c("L", "K")
+
+      # Generate rank plot configurations and adjust the dataframe
+      rank_plot_results <- generate_rank_plot_configs(
+        df = zscores_interactions,
+        interaction_vars = interaction_vars,
+        rank_vars = rank_vars,
+        is_lm = FALSE
+      )
+      zscores_interactions_adjusted <- rank_plot_results$adjusted_df
+
+      # Generate and save standard rank plots
+      generate_and_save_plots(
+        output_dir = out_dir,
+        file_name = "RankPlots",
+        plot_configs = rank_plot_results$plot_configs,
+        grid_layout = list(ncol = 3, nrow = 2)
       )
       )
-      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
-        plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
 
 
-      rank_lm_plot_config <- c(
-        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
-        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)
+      # Generate rank plot configurations for lm variables
+      rank_lm_plot_results <- generate_rank_plot_configs(
+        df = zscores_interactions_adjusted,
+        interaction_vars = interaction_vars,
+        rank_vars = rank_vars,
+        is_lm = TRUE
+      )
+
+      # Generate and save lm rank plots
+      generate_and_save_plots(
+        output_dir = out_dir,
+        file_name = "RankPlots_lm",
+        plot_configs = rank_lm_plot_results$plot_configs,
+        grid_layout = list(ncol = 3, nrow = 2)
       )
       )
-      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
-        plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
       
       
       # Formerly X_NArm
       # Formerly X_NArm
       zscores_interactions_filtered <- zscores_interactions %>%
       zscores_interactions_filtered <- zscores_interactions %>%