Переглянути джерело

Add more aes to generate_and_save_plots()

Bryan Roessler 8 місяців тому
батько
коміт
2313c48358
1 змінених файлів з 122 додано та 104 видалено
  1. 122 104
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 122 - 104
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -339,70 +339,71 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 }
 
 generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
-
+  
   message("Generating html and pdf plots for: ", file_name, ".pdf|html")
-
+  
   plots <- lapply(plot_configs, function(config) {
-
-    # Log configuration details
-    message("title: ", config$title)
-    message("plot_type: ", config$plot_type)
-    message("x_var: ", config$x_var)
-    message("y_var: ", config$y_var)
-    message("error_bar: ", config$error_bar)
-
+    
+    # Log details and setup
     df <- config$df
-
-    # Build the aes mapping depending on whether y_var is present
-    aes_mapping <- if (is.null(config$y_var)) {
-      aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var)))
-    } else {
-      aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))
-    }
-
-    # Initialize the plot with ggplot
+    aes_mapping <-
+      if (is.null(config$y_var))
+        aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var)))
+      else
+        aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))
+        
     plot <- ggplot(df, aes_mapping)
-
-    # Handle plot types explicitly
-    if (config$plot_type == "scatter") {
-      plot <- plot + geom_point(shape = 3)
-
-      # Add geom_smooth only if specified
-      if (!is.null(config$add_smooth) && config$add_smooth) {
-        plot <- plot + geom_smooth(method = "lm", se = FALSE)
-      }
-
-    } else if (config$plot_type == "rank") {
-      plot <- plot + geom_point(size = 0.1, shape = 3)
-
-      if (!is.null(config$sd_band)) {
-        for (i in seq_len(config$sd_band)) {
-          plot <- plot +
-            annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
-            annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
-            geom_hline(yintercept = c(-i, i), color = "gray")
-        }
-      }
-
-      if (!is.null(config$enhancer_label)) {
-        plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) +
-          annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label)
-      }
-    } else if (config$plot_type == "correlation") {
-      plot <- plot + geom_point(shape = 3, color = "gray70") +
+    
+    # Plot type handling
+    plot <- switch(config$plot_type,
+      
+      "scatter" = {
+        plot + geom_point(aes(ORF = ORF, Gene = Gene, !!sym(config$x_var) := !!sym(config$x_var)),
+          shape = config$shape %||% 3, size = config$size %||% 0.6) +
+          (if (!is.null(config$add_smooth) && config$add_smooth)
+            geom_smooth(method = "lm", se = FALSE)
+          else NULL) +
+          (if (!is.null(config$position) && config$position == "jitter")
+            geom_point(position = "jitter")
+          else NULL) +
+          # Use precalculated mean and sd for error bars
+          geom_errorbar(aes(
+            ymin = !!sym(paste0("mean_", config$y_var)) - !!sym(paste0("sd_", config$y_var)),
+            ymax = !!sym(paste0("mean_", config$y_var)) + !!sym(paste0("sd_", config$y_var))), width = 0.1) +
+          geom_point(aes(y = !!sym(paste0("mean_", config$y_var))), size = 0.6)
+      },
+      
+      "rank" = {
+        plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3) +
+          (if (!is.null(config$sd_band))
+            Reduce(`+`, lapply(seq_len(config$sd_band), function(i) {
+              list(
+                annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3),
+                annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3),
+                geom_hline(yintercept = c(-i, i), color = "gray")
+              )
+            })) else NULL) +
+          (if (!is.null(config$enhancer_label))
+            annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label)
+          else NULL) +
+          (if (!is.null(config$suppressor_label))
+            annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label)
+          else NULL)
+      },
+      
+      "correlation" = plot + geom_point(shape = config$shape %||% 3, color = "gray70") +
         geom_smooth(method = "lm", color = "tomato3") +
-        annotate("text", x = 0, y = 0, label = config$correlation_text)
-    } else if (config$plot_type == "box") {
-      plot <- plot + geom_boxplot()
-    } else if (config$plot_type == "density") {
-      plot <- plot + geom_density()
-    } else if (config$plot_type == "bar") {
-      plot <- plot + geom_bar()
-    } else {
-      plot <- plot + geom_point(shape = 3) + geom_smooth(method = "lm", se = FALSE)
-    }
-
-    # Handle error bars if needed
+        annotate("text", x = 0, y = 0, label = config$correlation_text),
+      
+      "box" = plot + geom_boxplot(),
+      "density" = plot + geom_density(),
+      "bar" = plot + geom_bar(),
+      
+      # Default case (scatter with smooth line)
+      plot + geom_point(shape = config$shape %||% 3) + geom_smooth(method = "lm", se = FALSE)
+    )
+    
+    # Error bars using pre-calculated mean and sd columns
     if (!is.null(config$error_bar) && config$error_bar) {
       y_mean_col <- paste0("mean_", config$y_var)
       y_sd_col <- paste0("sd_", config$y_var)
@@ -411,37 +412,30 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
         ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
         geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
     }
-
-    # Apply y-limits if provided
-    if (!is.null(config$ylim_vals)) {
-      plot <- plot + coord_cartesian(ylim = config$ylim_vals)
-    }
-
-    # Apply titles, labels, and legends
-    plot <- plot + ggtitle(config$title) +
-      theme_publication(legend_position = if (!is.null(config$legend_position)) config$legend_position else "bottom") +
+    
+    # Y-limits and labels
+    plot <- plot + (if (!is.null(config$ylim_vals)) coord_cartesian(ylim = config$ylim_vals) else NULL) +
+      ggtitle(config$title) +
+      theme_publication(legend_position = config$legend_position %||% "bottom") +
       xlab(config$x_label %||% "") + ylab(config$y_label %||% "")
-
-    # Add any annotations
+    
+    # Annotations
     if (!is.null(config$annotations)) {
       for (annotation in config$annotations) {
         plot <- plot + geom_text(aes(x = annotation$x, y = annotation$y, label = annotation$label))
       }
     }
-
+    
     return(plot)
   })
 
-  # Save the plots as PDF
+  # Save plots to PDF and HTML
   pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
   lapply(plots, print)
   dev.off()
 
-  # Convert ggplot to plotly for interactive HTML output
   plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))))
-
-  # Combine plots in grid layout if applicable
-  combined_plot <- subplot(plotly_plots, nrows = if (!is.null(grid_layout)) grid_layout$nrow else length(plots), margin = 0.05)
+  combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05)
   saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
 }
 
@@ -492,7 +486,10 @@ generate_interaction_plot_configs <- function(df, variables) {
       ),
       x_breaks = unique(df$conc_num_factor),
       x_labels = unique(as.character(df$conc_num)),
-      x_label = unique(df$Drug[1])
+      x_label = unique(df$Drug[1]),
+      shape = 3,
+      size = 0.6,
+      position = "jitter"
     )
 
     # Add box plot configuration for this variable
@@ -551,10 +548,11 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA
       suppressor_label = list(
         x = nrow(df) / 2, y = -10,
         label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
-      )
+      ),
+      shape = 3,
+      size = 0.1,
+      position = "jitter"
     )
-
-    return(configs)
   }
   
   # Non-annotated version (_notext)
@@ -567,7 +565,10 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA
       title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
       sd_band = sd_band,
       enhancer_label = NULL,  # No annotations for _notext
-      suppressor_label = NULL  # No annotations for _notext
+      suppressor_label = NULL,  # No annotations for _notext
+      shape = 3,
+      size = 0.1,
+      position = "jitter"
     )
   }
   
@@ -590,6 +591,8 @@ generate_correlation_plot_configs <- function(df, variables) {
       title = paste("Avg Zscore vs lm", variable),
       color_var = "Overlap",
       correlation_text = paste("R-squared =", round(df[[lm_r_squared_col]][1], 2)),
+      shape = 3,
+      geom_smooth = TRUE,
       legend_position = "right"
     )
   }
@@ -626,7 +629,7 @@ main <- function() {
     # Remove rows with 0 values in L
     df_no_zeros <- df_na %>% filter(L > 0)
     
-    # Set some constants
+    # Save some constants
     max_conc <- max(df$conc_num_factor)
     l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
     k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
@@ -705,46 +708,56 @@ main <- function() {
         plot_type = "scatter",
         title = "Raw L vs K before quality control",
         color_var = "conc_num",
+        position = "jitter",
         legend_position = "right"
       )
     )
 
+    frequency_delta_bg_plots <- list(
+      list(
+        df = df_filtered_stats,
+        x_var = "delta_bg",
+        y_var = NULL,
+        plot_type = "density",
+        title = "Plate analysis by Drug Conc for Delta Background before quality control",
+        color_var = "conc_num",
+        x_label = "Delta Background",
+        y_label = "Density",
+        error_bar = FALSE,
+        legend_position = "right"),
+      list(
+        df = df_filtered_stats,
+        x_var = "delta_bg",
+        y_var = NULL,
+        plot_type = "bar",
+        title = "Plate analysis by Drug Conc for Delta Background before quality control",
+        color_var = "conc_num",
+        x_label = "Delta Background",
+        y_label = "Count",
+        error_bar = FALSE,
+        legend_position = "right")
+    )
+
     above_threshold_plots <- list(
       list(
         df = df_above_tolerance,
         x_var = "L",
         y_var = "K",
         plot_type = "scatter",
-        title = paste("Raw L vs K for strains above delta background threshold of",
+        title = paste("Raw L vs K for strains above Delta Background threshold of",
           df_above_tolerance$delta_bg_tolerance[[1]], "or above"),
         color_var = "conc_num",
+        position = "jitter",
         annotations = list(
           x = l_half_median,
           y = k_half_median,
-          label = paste("Strains above delta background tolerance =", nrow(df_above_tolerance))
+          label = paste("# strains above Delta Background tolerance =", nrow(df_above_tolerance))
         ),
         error_bar = FALSE,
         legend_position = "right"
       )
     )
 
-    frequency_delta_bg_plots <- list(
-      list(df = df_filtered_stats, x_var = "delta_bg", y_var = NULL, plot_type = "density",
-        title = "Plate analysis by Drug Conc for delta background before quality control",
-        color_var = "conc_num",
-        x_label = "Delta Background",
-        y_label = "Density",
-        error_bar = FALSE,
-        legend_position = "right"),
-      list(df = df_filtered_stats, x_var = "delta_bg", y_var = NULL, plot_type = "bar",
-        title = "Plate analysis by Drug Conc for delta background before quality control",
-        color_var = "conc_num",
-        x_label = "Delta Background",
-        y_label = "Count",
-        error_bar = FALSE,
-        legend_position = "right")
-    )
-
     plate_analysis_plots <- list()
     for (var in summary_vars) {
       for (stage in c("before", "after")) {
@@ -760,7 +773,9 @@ main <- function() {
           y_var = var,
           plot_type = "scatter",
           title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
-          error_bar = TRUE, color_var = "conc_num")
+          error_bar = TRUE,
+          color_var = "conc_num",
+          position = "jitter")
 
         plate_analysis_plots <- append(plate_analysis_plots, list(config))
       }
@@ -797,7 +812,8 @@ main <- function() {
         plot_type = "scatter",
         title = paste("Plate analysis by Drug Conc for", var, "after quality control"),
         error_bar = TRUE,
-        color_var = "conc_num")
+        color_var = "conc_num",
+        position = "jitter")
 
       plate_analysis_no_zeros_plots <- append(plate_analysis_no_zeros_plots, list(config))
     }
@@ -827,6 +843,7 @@ main <- function() {
         plot_type = "scatter",
         title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
         color_var = "conc_num",
+        position = "jitter",
         legend_position = "right"
       )
     )
@@ -839,14 +856,15 @@ main <- function() {
         plot_type = "scatter",
         title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
         color_var = "conc_num",
+        position = "jitter",
         legend_position = "right"
       )
     )
 
     message("Generating QC plots")
     generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
-    generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
     generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
+    generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
     generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
     generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
     generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)