Bläddra i källkod

Break apart rank plot code

Bryan Roessler 7 månader sedan
förälder
incheckning
9fe45ba73f
1 ändrade filer med 127 tillägg och 82 borttagningar
  1. 127 82
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 127 - 82
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -457,7 +457,7 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
 }
 
 generate_scatter_plot <- function(plot, config) {
-
+  
   if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
     plot <- plot + geom_point(
       shape = config$shape %||% 3,
@@ -472,31 +472,32 @@ generate_scatter_plot <- function(plot, config) {
   } else {
     plot <- plot + geom_point(
       shape = config$shape %||% 3,
-      position = if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity"
+      position = if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity",
+      size = config$size %||% 0.1
     )
   }
-
+  
   # Add smooth line if specified
   if (!is.null(config$add_smooth) && config$add_smooth) {
-    plot <- if (!is.null(config$lm_line)) {
-      plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope)
+    if (!is.null(config$lm_line)) {
+      plot <- plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "blue")
     } else {
-      plot + geom_smooth(method = "lm", se = FALSE)
+      plot <- plot + geom_smooth(method = "lm", se = FALSE, color = "blue")
     }
   }
-
-  # Add SD bands (iterate over sd_band only here)
-  if (!is.null(config$sd_band)) {
-    for (i in config$sd_band) {
+  
+  # Add SD bands if specified
+  if (!is.null(config$sd_band_values)) {
+    for (sd_band in config$sd_band_values) {
       plot <- plot +
-        annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
-        annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
-        geom_hline(yintercept = c(-i, i), color = "gray")
+        annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_band, ymax = Inf, fill = "#542788", alpha = 0.3) +
+        annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_band, ymax = -Inf, fill = "orange", alpha = 0.3) +
+        geom_hline(yintercept = c(-sd_band, sd_band), color = "gray")
     }
   }
-
+  
   # Add error bars if specified
-  if (!is.null(config$error_bar) && config$error_bar) {
+  if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) {
     y_mean_col <- paste0("mean_", config$y_var)
     y_sd_col <- paste0("sd_", config$y_var)
     plot <- plot + geom_errorbar(
@@ -507,7 +508,7 @@ generate_scatter_plot <- function(plot, config) {
       alpha = 0.3
     )
   }
-
+  
   # Add x-axis customization if specified
   if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
     plot <- plot + scale_x_discrete(
@@ -516,17 +517,17 @@ generate_scatter_plot <- function(plot, config) {
       labels = config$x_labels
     )
   }
-
+  
   # Use coord_cartesian for zooming in without removing data outside the range
   if (!is.null(config$coord_cartesian)) {
     plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
   }
-
+  
   # Use scale_y_continuous for setting the y-axis limits
   if (!is.null(config$ylim_vals)) {
     plot <- plot + scale_y_continuous(limits = config$ylim_vals)
   }
-
+  
   # Add annotations if specified
   if (!is.null(config$annotations)) {
     for (annotation in config$annotations) {
@@ -534,19 +535,20 @@ generate_scatter_plot <- function(plot, config) {
         x = annotation$x,
         y = annotation$y,
         label = annotation$label,
-        na.rm = TRUE)
+        na.rm = TRUE
+      )
     }
   }
-
+  
   # Add titles and themes if specified
   if (!is.null(config$title)) {
     plot <- plot + ggtitle(config$title)
   }
-
+  
   if (!is.null(config$legend_position)) {
     plot <- plot + theme(legend.position = config$legend_position)
   }
-
+  
   return(plot)
 }
 
@@ -671,81 +673,124 @@ generate_interaction_plot_configs <- function(df, variables) {
 }
 
 generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) {
-
+  
   df_filtered <- filter_data(df, variables, missing = TRUE)
-
-  for (var in variables) {
-    avg_zscore_col <- paste0("Avg_Zscore_", var)
-    z_lm_col <- paste0("Z_lm_", var)
-    rank_col <- paste0("Rank_", var)
-    rank_lm_col <- paste0("Rank_lm_", var)
-
-    if (adjust) {
-      # Replace NA with 0.001 for interaction variables
-      df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
-      df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
-    }
-
-    # Compute ranks for interaction variables
-    df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
-    df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
-  }
+  
+  # Define SD bands
+  sd_bands <- c(1, 2, 3)
+  
+  # Define variables for Avg ZScore and Rank Avg ZScore plots
+  avg_zscore_vars <- c("r", "L", "K", "AUC")
   
   # Initialize list to store plot configurations
   configs <- list()
   
-  # Generate plot configurations for rank variables (L and K) with sd bands
+  #### 1. SD-Based Plots for L and K ####
   for (var in c("L", "K")) {
-    if (is_lm) {
-      rank_var <- paste0("Rank_lm_", var)
-      zscore_var <- paste0("Z_lm_", var)
-      plot_title_prefix <- "Interaction Z score vs. Rank for"
-    } else {
-      rank_var <- paste0("Rank_", var)
-      zscore_var <- paste0("Avg_Zscore_", var)
-      plot_title_prefix <- "Average Z score vs. Rank for"
-    }
-
-    # Create plot configurations for each SD band
-    for (sd_band in c(1, 2, 3)) {
-      # Annotated version
+    for (sd_band in sd_bands) {
+      
+      # Determine columns based on whether it's lm or not
+      if (is_lm) {
+        rank_var <- paste0(var, "_Rank_lm")
+        zscore_var <- paste0("Z_lm_", var)
+        y_label <- paste("Int Z score", var)
+      } else {
+        rank_var <- paste0(var, "_Rank")
+        zscore_var <- paste0("Avg_Zscore_", var)
+        y_label <- paste("Avg Z score", var)
+      }
+      
+      # Annotated Plot Configuration
       configs[[length(configs) + 1]] <- list(
-        df = df,
+        df = df_filtered,
         x_var = rank_var,
         y_var = zscore_var,
         plot_type = "scatter",
-        title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
+        title = paste(y_label, "vs. Rank for", var, "above", sd_band, "SD"),
         sd_band = sd_band,
-        enhancer_label = list(
-          x = nrow(df) / 2,
-          y = 10,
-          label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ]))
-        ),
-        suppressor_label = list(
-          x = nrow(df) / 2,
-          y = -10,
-          label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
+        annotations = list(
+          list(
+            x = median(df_filtered[[rank_var]], na.rm = TRUE),
+            y = 10,
+            label = paste("Deletion Enhancers =", sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE))
+          ),
+          list(
+            x = median(df_filtered[[rank_var]], na.rm = TRUE),
+            y = -10,
+            label = paste("Deletion Suppressors =", sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE))
+          )
         ),
+        sd_band_values = sd_band,
         shape = 3,
         size = 0.1
       )
-
-      # Non-annotated version (_notext)
+      
+      # Non-Annotated Plot Configuration
       configs[[length(configs) + 1]] <- list(
-        df = df,
+        df = df_filtered,
         x_var = rank_var,
         y_var = zscore_var,
         plot_type = "scatter",
-        title = paste(plot_title_prefix, var, "above", sd_band, "SD No Annotations"),
+        title = paste(y_label, "vs. Rank for", var, "above", sd_band, "SD No Annotations"),
         sd_band = sd_band,
-        enhancer_label = NULL,
-        suppressor_label = NULL,
+        annotations = NULL,
+        sd_band_values = sd_band,
         shape = 3,
         size = 0.1
       )
     }
   }
-    
+  
+  #### 2. Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC ####
+  for (var in avg_zscore_vars) {
+    for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
+      
+      # Define x and y variables based on plot type
+      if (plot_type == "Avg_Zscore_vs_lm") {
+        x_var <- paste0("Avg_Zscore_", var)
+        y_var <- paste0("Z_lm_", var)
+        title_suffix <- paste("Avg Zscore vs lm", var)
+      } else if (plot_type == "Rank_Avg_Zscore_vs_lm") {
+        x_var <- paste0(var, "_Rank")
+        y_var <- paste0(var, "_Rank_lm")
+        title_suffix <- paste("Rank Avg Zscore vs lm", var)
+      }
+      
+      # Determine y-axis label
+      if (plot_type == "Avg_Zscore_vs_lm") {
+        y_label <- paste("Z lm", var)
+      } else {
+        y_label <- paste("Rank lm", var)
+      }
+      
+      # Determine correlation text (R-squared)
+      lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
+      r_squared <- summary(lm_fit)$r.squared
+      
+      # Plot Configuration
+      configs[[length(configs) + 1]] <- list(
+        df = df_filtered,
+        x_var = x_var,
+        y_var = y_var,
+        plot_type = "scatter",
+        title = title_suffix,
+        annotations = list(
+          list(
+            x = 0,
+            y = 0,
+            label = paste("R-squared =", round(r_squared, 2))
+          )
+        ),
+        sd_band_values = NULL,  # Not applicable
+        shape = 3,
+        size = 0.1,
+        add_smooth = TRUE,
+        lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
+        legend_position = "right"
+      )
+    }
+  }
+  
   return(configs)
 }
 
@@ -1043,15 +1088,15 @@ main <- function() {
     )
 
     message("Generating quality control plots")
-    # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
-    # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
-    # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
-    # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
-    # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
-    # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
-    # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
-    # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
-    # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
+    generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
+    generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
+    generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
+    generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
+    generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
+    generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
+    generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
+    generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
+    generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
 
     # TODO: Originally this filtered L NA's
     # Let's try to avoid for now since stats have already been calculated