From 5afc46c3872989d252f694b25c4560dd810b2065 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Thu, 12 Sep 2024 04:03:10 -0400 Subject: [PATCH] Break generate_and_save_plots() into component functions --- .../apps/r/calculate_interaction_zscores.R | 240 +++++++++--------- 1 file changed, 122 insertions(+), 118 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 7dc74065..3f493767 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -339,137 +339,140 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c } generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { - - message("Generating html and pdf plots for: ", file_name, ".pdf|html") - plots <- lapply(plot_configs, function(config) { - - # Log details and setup df <- config$df - aes_mapping <- - if (is.null(config$y_var)) - aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var))) - else - aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))) - + + # Define aes mapping based on the presence of y_var + aes_mapping <- if (is.null(config$y_var)) { + aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var))) + } else { + aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))) + } + plot <- ggplot(df, aes_mapping) - - # Plot type handling + + # Use appropriate helper function based on plot type plot <- switch(config$plot_type, - "scatter" = { - plot <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) { - plot + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), config$shape %||% 3) - } else if (!is.null(config$gene_point) && config$gene_point) { - plot + geom_point(aes(ORF = ORF, Gene = Gene, Gene = Gene), shape = config$shape %||% 3, position = "jitter") - } else if (!is.null(config$position) && config$position == "jitter") { - plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2, position = "jitter") - } else { - plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2) - } - - if (!is.null(config$add_smooth) && config$add_smooth) { - if (!is.null(config$lm_line)) { - # Use precomputed linear model values if available - plot <- plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope) - } else { - # Fallback to dynamically calculating the smooth line - plot <- plot + geom_smooth(method = "lm", se = FALSE) - } - } - - if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { - plot <- plot + scale_x_continuous( - name = config$x_label, - breaks = config$x_breaks, - labels = config$x_labels - ) - } - - if (!is.null(config$ylim_vals)) { - plot <- plot + scale_y_continuous(limits = config$ylim_vals) - } - - if (!is.null(config$delta_bg_point) && config$delta_bg_point) { - plot <- plot + - geom_errorbar(aes( - ymin = !!sym(paste0("mean_", config$y_var)) - !!sym(paste0("sd_", config$y_var)), - ymax = !!sym(paste0("mean_", config$y_var)) + !!sym(paste0("sd_", config$y_var))), - width = 0.1) + - geom_point(aes(y = !!sym(paste0("mean_", config$y_var))), size = 0.6) - } - - if (!is.null(config$coord_cartesian)) { - plot <- plot + coord_cartesian(ylim = config$coord_cartesian) - } - plot - }, - - "rank" = { - plot <- plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3) - - if (!is.null(config$sd_band)) { - for (i in seq_len(config$sd_band)) { - plot <- plot + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-i, i), color = "gray") - } - } - - if (!is.null(config$enhancer_label)) { - plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) - } - - if (!is.null(config$suppressor_label)) { - plot <- plot + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) - } - plot - }, - - "correlation" = { - plot <- plot + geom_point(shape = config$shape %||% 3, color = "gray70") + - geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "tomato3") + - annotate("text", x = config$annotate_position$x, y = config$annotate_position$y, label = config$correlation_text) - - if (!is.null(config$rect)) { - plot <- plot + geom_rect(aes(xmin = config$rect$xmin, - xmax = config$rect$xmax, ymin = config$rect$ymin, ymax = config$rect$ymax), - color = "grey20", size = 0.25, alpha = 0.1, fill = NA, inherit.aes = FALSE) - } - plot - }, - - "box" = { - plot <- plot + geom_boxplot() - if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { - plot <- plot + scale_x_discrete( - name = config$x_label, - breaks = config$x_breaks, - labels = config$x_labels - ) - } - if (!is.null(config$coord_cartesian)) { - plot <- plot + coord_cartesian(ylim = config$coord_cartesian) - } - plot - }, + "scatter" = generate_scatter_plot(plot, config), + "rank" = generate_rank_plot(plot, config), + "correlation" = generate_correlation_plot(plot, config), + "box" = generate_box_plot(plot, config), "density" = plot + geom_density(), - "bar" = plot + geom_bar() + "bar" = plot + geom_bar(), + plot # default case if no type matches ) - - plot + + return(plot) }) - - # Save plots to PDF and HTML + + # Save plots to file (PDF and HTML) + save_plots(plots, output_dir, file_name, grid_layout) +} + +save_plots <- function(plots, output_dir, file_name, grid_layout = NULL) { + # PDF saving logic pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) lapply(plots, print) dev.off() - + + # HTML saving logic plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))) combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05) saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) } +generate_scatter_plot <- function(plot, config) { + plot <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) { + plot + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), shape = config$shape %||% 3) + } else if (!is.null(config$gene_point) && config$gene_point) { + plot + geom_point(aes(ORF = ORF, Gene = Gene), shape = config$shape %||% 3, position = "jitter") + } else if (!is.null(config$position) && config$position == "jitter") { + plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2, position = "jitter") + } else { + plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2) + } + + if (!is.null(config$add_smooth) && config$add_smooth) { + if (!is.null(config$lm_line)) { + plot <- plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope) + } else { + plot <- plot + geom_smooth(method = "lm", se = FALSE) + } + } + + if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { + plot <- plot + scale_x_continuous( + name = config$x_label, + breaks = config$x_breaks, + labels = config$x_labels + ) + } + + if (!is.null(config$ylim_vals)) { + plot <- plot + scale_y_continuous(limits = config$ylim_vals) + } + + if (!is.null(config$coord_cartesian)) { + plot <- plot + coord_cartesian(ylim = config$coord_cartesian) + } + + return(plot) +} + +generate_rank_plot <- function(plot, config) { + plot <- plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3) + + if (!is.null(config$sd_band)) { + for (i in seq_len(config$sd_band)) { + plot <- plot + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + + geom_hline(yintercept = c(-i, i), color = "gray") + } + } + + if (!is.null(config$enhancer_label)) { + plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) + } + + if (!is.null(config$suppressor_label)) { + plot <- plot + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) + } + + return(plot) +} + +generate_correlation_plot <- function(plot, config) { + plot <- plot + geom_point(shape = config$shape %||% 3, color = "gray70") + + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "tomato3") + + annotate("text", x = config$annotate_position$x, y = config$annotate_position$y, label = config$correlation_text) + + if (!is.null(config$rect)) { + plot <- plot + geom_rect(aes(xmin = config$rect$xmin, xmax = config$rect$xmax, ymin = config$rect$ymin, ymax = config$rect$ymax), + color = "grey20", size = 0.25, alpha = 0.1, fill = NA, inherit.aes = FALSE) + } + + return(plot) +} + +generate_box_plot <- function(plot, config) { + plot <- plot + geom_boxplot() + + if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { + plot <- plot + scale_x_discrete( + name = config$x_label, + breaks = config$x_breaks, + labels = config$x_labels + ) + } + + if (!is.null(config$coord_cartesian)) { + plot <- plot + coord_cartesian(ylim = config$coord_cartesian) + } + + return(plot) +} + generate_interaction_plot_configs <- function(df, variables) { configs <- list() @@ -684,7 +687,7 @@ main <- function() { message("Calculating summary statistics before quality control") ss <- calculate_summary_stats(df, summary_vars, group_vars = group_vars) - df_ss <- ss$summary_stats + # df_ss <- ss$summary_stats df_stats <- ss$df_with_stats df_filtered_stats <- df_stats %>% { @@ -921,7 +924,8 @@ main <- function() { generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) # Clean up - rm(df, df_above_tolerance, df_no_zeros) + rm(df, df_above_tolerance, df_no_zeros, df_no_zeros_stats, df_no_zeros_filtered_stats, ss) + gc() # TODO: Originally this filtered L NA's # Let's try to avoid for now since stats have already been calculated