Browse Source

Break generate_and_save_plots() into component functions

Bryan Roessler 7 tháng trước cách đây
mục cha
commit
5afc46c387
1 tập tin đã thay đổi với 121 bổ sung117 xóa
  1. 121 117
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 121 - 117
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -339,137 +339,140 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 }
 
 generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
-  
-  message("Generating html and pdf plots for: ", file_name, ".pdf|html")
-  
   plots <- lapply(plot_configs, function(config) {
-    
-    # Log details and setup
     df <- config$df
-    aes_mapping <-
-      if (is.null(config$y_var))
-        aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var)))
-      else
-        aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))
-        
-    plot <- ggplot(df, aes_mapping)
-    
-    # Plot type handling
-    plot <- switch(config$plot_type,
-      "scatter" = {
-        plot <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
-          plot + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), config$shape %||% 3)
-        } else if (!is.null(config$gene_point) && config$gene_point) {
-          plot + geom_point(aes(ORF = ORF, Gene = Gene, Gene = Gene), shape = config$shape %||% 3, position = "jitter")
-        } else if (!is.null(config$position) && config$position == "jitter") {
-          plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2, position = "jitter")
-        } else {
-          plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2)
-        }
-
-        if (!is.null(config$add_smooth) && config$add_smooth) {
-          if (!is.null(config$lm_line)) {
-            # Use precomputed linear model values if available
-            plot <- plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope)
-          } else {
-            # Fallback to dynamically calculating the smooth line
-            plot <- plot + geom_smooth(method = "lm", se = FALSE)
-          }
-        }
 
-        if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
-          plot <- plot + scale_x_continuous(
-            name = config$x_label,
-            breaks = config$x_breaks,
-            labels = config$x_labels
-          )
-        }
+    # Define aes mapping based on the presence of y_var
+    aes_mapping <- if (is.null(config$y_var)) {
+      aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var)))
+    } else {
+      aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))
+    }
 
-        if (!is.null(config$ylim_vals)) {
-          plot <- plot + scale_y_continuous(limits = config$ylim_vals)
-        }
-    
-        if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
-          plot <- plot +
-            geom_errorbar(aes(
-              ymin = !!sym(paste0("mean_", config$y_var)) - !!sym(paste0("sd_", config$y_var)),
-              ymax = !!sym(paste0("mean_", config$y_var)) + !!sym(paste0("sd_", config$y_var))),
-              width = 0.1) +
-            geom_point(aes(y = !!sym(paste0("mean_", config$y_var))), size = 0.6)
-        }
+    plot <- ggplot(df, aes_mapping)
 
-        if (!is.null(config$coord_cartesian)) {
-          plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
-        }
-        plot
-      },
-      
-      "rank" = {
-        plot <- plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3)
-        
-        if (!is.null(config$sd_band)) {
-          for (i in seq_len(config$sd_band)) {
-            plot <- plot +
-              annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
-              annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
-              geom_hline(yintercept = c(-i, i), color = "gray")
-          }
-        }
-        
-        if (!is.null(config$enhancer_label)) {
-          plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label)
-        }
-        
-        if (!is.null(config$suppressor_label)) {
-          plot <- plot + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label)
-        }
-        plot
-      },
-      
-      "correlation" = {
-        plot <- plot + geom_point(shape = config$shape %||% 3, color = "gray70") +
-          geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "tomato3") +
-          annotate("text", x = config$annotate_position$x, y = config$annotate_position$y, label = config$correlation_text)
-        
-        if (!is.null(config$rect)) {
-          plot <- plot + geom_rect(aes(xmin = config$rect$xmin,
-            xmax = config$rect$xmax, ymin = config$rect$ymin, ymax = config$rect$ymax),
-            color = "grey20", size = 0.25, alpha = 0.1, fill = NA, inherit.aes = FALSE)
-        }
-        plot
-      },
-      
-      "box" = {
-        plot <- plot + geom_boxplot()
-        if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
-          plot <- plot + scale_x_discrete(
-            name = config$x_label,
-            breaks = config$x_breaks,
-            labels = config$x_labels
-          )
-        }
-        if (!is.null(config$coord_cartesian)) {
-          plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
-        }
-        plot
-      },
+    # Use appropriate helper function based on plot type
+    plot <- switch(config$plot_type,
+      "scatter" = generate_scatter_plot(plot, config),
+      "rank" = generate_rank_plot(plot, config),
+      "correlation" = generate_correlation_plot(plot, config),
+      "box" = generate_box_plot(plot, config),
       "density" = plot + geom_density(),
-      "bar" = plot + geom_bar()
+      "bar" = plot + geom_bar(),
+      plot  # default case if no type matches
     )
-    
-    plot
+
+    return(plot)
   })
-  
-  # Save plots to PDF and HTML
+
+  # Save plots to file (PDF and HTML)
+  save_plots(plots, output_dir, file_name, grid_layout)
+}
+
+save_plots <- function(plots, output_dir, file_name, grid_layout = NULL) {
+  # PDF saving logic
   pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
   lapply(plots, print)
   dev.off()
-  
+
+  # HTML saving logic
   plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))))
   combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05)
   saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
 }
 
+generate_scatter_plot <- function(plot, config) {
+  plot <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
+    plot + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = config$shape %||% 3)
+  } else if (!is.null(config$gene_point) && config$gene_point) {
+    plot + geom_point(aes(ORF = ORF, Gene = Gene), shape = config$shape %||% 3, position = "jitter")
+  } else if (!is.null(config$position) && config$position == "jitter") {
+    plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2, position = "jitter")
+  } else {
+    plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2)
+  }
+
+  if (!is.null(config$add_smooth) && config$add_smooth) {
+    if (!is.null(config$lm_line)) {
+      plot <- plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope)
+    } else {
+      plot <- plot + geom_smooth(method = "lm", se = FALSE)
+    }
+  }
+
+  if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
+    plot <- plot + scale_x_continuous(
+      name = config$x_label,
+      breaks = config$x_breaks,
+      labels = config$x_labels
+    )
+  }
+
+  if (!is.null(config$ylim_vals)) {
+    plot <- plot + scale_y_continuous(limits = config$ylim_vals)
+  }
+
+  if (!is.null(config$coord_cartesian)) {
+    plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
+  }
+  
+  return(plot)
+}
+
+generate_rank_plot <- function(plot, config) {
+  plot <- plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3)
+  
+  if (!is.null(config$sd_band)) {
+    for (i in seq_len(config$sd_band)) {
+      plot <- plot +
+        annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
+        annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
+        geom_hline(yintercept = c(-i, i), color = "gray")
+    }
+  }
+
+  if (!is.null(config$enhancer_label)) {
+    plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label)
+  }
+
+  if (!is.null(config$suppressor_label)) {
+    plot <- plot + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label)
+  }
+
+  return(plot)
+}
+
+generate_correlation_plot <- function(plot, config) {
+  plot <- plot + geom_point(shape = config$shape %||% 3, color = "gray70") +
+    geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "tomato3") +
+    annotate("text", x = config$annotate_position$x, y = config$annotate_position$y, label = config$correlation_text)
+  
+  if (!is.null(config$rect)) {
+    plot <- plot + geom_rect(aes(xmin = config$rect$xmin, xmax = config$rect$xmax, ymin = config$rect$ymin, ymax = config$rect$ymax),
+      color = "grey20", size = 0.25, alpha = 0.1, fill = NA, inherit.aes = FALSE)
+  }
+
+  return(plot)
+}
+
+generate_box_plot <- function(plot, config) {
+  plot <- plot + geom_boxplot()
+  
+  if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
+    plot <- plot + scale_x_discrete(
+      name = config$x_label,
+      breaks = config$x_breaks,
+      labels = config$x_labels
+    )
+  }
+
+  if (!is.null(config$coord_cartesian)) {
+    plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
+  }
+
+  return(plot)
+}
+
 generate_interaction_plot_configs <- function(df, variables) {
   configs <- list()
 
@@ -684,7 +687,7 @@ main <- function() {
 
     message("Calculating summary statistics before quality control")
     ss <- calculate_summary_stats(df, summary_vars, group_vars = group_vars)
-    df_ss <- ss$summary_stats
+    # df_ss <- ss$summary_stats
     df_stats <- ss$df_with_stats
     df_filtered_stats <- df_stats %>%
       {
@@ -921,7 +924,8 @@ main <- function() {
     generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
 
     # Clean up
-    rm(df, df_above_tolerance, df_no_zeros)
+    rm(df, df_above_tolerance, df_no_zeros, df_no_zeros_stats, df_no_zeros_filtered_stats, ss)
+    gc()
 
     # TODO: Originally this filtered L NA's
     # Let's try to avoid for now since stats have already been calculated