From 2313c483580348b5ca907432261a7f39249df89c Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Wed, 11 Sep 2024 22:07:51 -0400 Subject: [PATCH] Add more aes to generate_and_save_plots() --- .../apps/r/calculate_interaction_zscores.R | 226 ++++++++++-------- 1 file changed, 122 insertions(+), 104 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index e93f4666..184a28ca 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -339,70 +339,71 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c } generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { - + message("Generating html and pdf plots for: ", file_name, ".pdf|html") - + plots <- lapply(plot_configs, function(config) { - - # Log configuration details - message("title: ", config$title) - message("plot_type: ", config$plot_type) - message("x_var: ", config$x_var) - message("y_var: ", config$y_var) - message("error_bar: ", config$error_bar) - + + # Log details and setup df <- config$df - - # Build the aes mapping depending on whether y_var is present - aes_mapping <- if (is.null(config$y_var)) { - aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var))) - } else { - aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))) - } - - # Initialize the plot with ggplot + aes_mapping <- + if (is.null(config$y_var)) + aes(x = !!sym(config$x_var), color = as.factor(!!sym(config$color_var))) + else + aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))) + plot <- ggplot(df, aes_mapping) - - # Handle plot types explicitly - if (config$plot_type == "scatter") { - plot <- plot + geom_point(shape = 3) - - # Add geom_smooth only if specified - if (!is.null(config$add_smooth) && config$add_smooth) { - plot <- plot + geom_smooth(method = "lm", se = FALSE) - } - - } else if (config$plot_type == "rank") { - plot <- plot + geom_point(size = 0.1, shape = 3) - - if (!is.null(config$sd_band)) { - for (i in seq_len(config$sd_band)) { - plot <- plot + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-i, i), color = "gray") - } - } - - if (!is.null(config$enhancer_label)) { - plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) + - annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) - } - } else if (config$plot_type == "correlation") { - plot <- plot + geom_point(shape = 3, color = "gray70") + + + # Plot type handling + plot <- switch(config$plot_type, + + "scatter" = { + plot + geom_point(aes(ORF = ORF, Gene = Gene, !!sym(config$x_var) := !!sym(config$x_var)), + shape = config$shape %||% 3, size = config$size %||% 0.6) + + (if (!is.null(config$add_smooth) && config$add_smooth) + geom_smooth(method = "lm", se = FALSE) + else NULL) + + (if (!is.null(config$position) && config$position == "jitter") + geom_point(position = "jitter") + else NULL) + + # Use precalculated mean and sd for error bars + geom_errorbar(aes( + ymin = !!sym(paste0("mean_", config$y_var)) - !!sym(paste0("sd_", config$y_var)), + ymax = !!sym(paste0("mean_", config$y_var)) + !!sym(paste0("sd_", config$y_var))), width = 0.1) + + geom_point(aes(y = !!sym(paste0("mean_", config$y_var))), size = 0.6) + }, + + "rank" = { + plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3) + + (if (!is.null(config$sd_band)) + Reduce(`+`, lapply(seq_len(config$sd_band), function(i) { + list( + annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3), + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3), + geom_hline(yintercept = c(-i, i), color = "gray") + ) + })) else NULL) + + (if (!is.null(config$enhancer_label)) + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) + else NULL) + + (if (!is.null(config$suppressor_label)) + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) + else NULL) + }, + + "correlation" = plot + geom_point(shape = config$shape %||% 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + - annotate("text", x = 0, y = 0, label = config$correlation_text) - } else if (config$plot_type == "box") { - plot <- plot + geom_boxplot() - } else if (config$plot_type == "density") { - plot <- plot + geom_density() - } else if (config$plot_type == "bar") { - plot <- plot + geom_bar() - } else { - plot <- plot + geom_point(shape = 3) + geom_smooth(method = "lm", se = FALSE) - } - - # Handle error bars if needed + annotate("text", x = 0, y = 0, label = config$correlation_text), + + "box" = plot + geom_boxplot(), + "density" = plot + geom_density(), + "bar" = plot + geom_bar(), + + # Default case (scatter with smooth line) + plot + geom_point(shape = config$shape %||% 3) + geom_smooth(method = "lm", se = FALSE) + ) + + # Error bars using pre-calculated mean and sd columns if (!is.null(config$error_bar) && config$error_bar) { y_mean_col <- paste0("mean_", config$y_var) y_sd_col <- paste0("sd_", config$y_var) @@ -411,37 +412,30 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) } - - # Apply y-limits if provided - if (!is.null(config$ylim_vals)) { - plot <- plot + coord_cartesian(ylim = config$ylim_vals) - } - - # Apply titles, labels, and legends - plot <- plot + ggtitle(config$title) + - theme_publication(legend_position = if (!is.null(config$legend_position)) config$legend_position else "bottom") + + + # Y-limits and labels + plot <- plot + (if (!is.null(config$ylim_vals)) coord_cartesian(ylim = config$ylim_vals) else NULL) + + ggtitle(config$title) + + theme_publication(legend_position = config$legend_position %||% "bottom") + xlab(config$x_label %||% "") + ylab(config$y_label %||% "") - - # Add any annotations + + # Annotations if (!is.null(config$annotations)) { for (annotation in config$annotations) { plot <- plot + geom_text(aes(x = annotation$x, y = annotation$y, label = annotation$label)) } } - + return(plot) }) - # Save the plots as PDF + # Save plots to PDF and HTML pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) lapply(plots, print) dev.off() - # Convert ggplot to plotly for interactive HTML output plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))) - - # Combine plots in grid layout if applicable - combined_plot <- subplot(plotly_plots, nrows = if (!is.null(grid_layout)) grid_layout$nrow else length(plots), margin = 0.05) + combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05) saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) } @@ -492,7 +486,10 @@ generate_interaction_plot_configs <- function(df, variables) { ), x_breaks = unique(df$conc_num_factor), x_labels = unique(as.character(df$conc_num)), - x_label = unique(df$Drug[1]) + x_label = unique(df$Drug[1]), + shape = 3, + size = 0.6, + position = "jitter" ) # Add box plot configuration for this variable @@ -551,10 +548,11 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA suppressor_label = list( x = nrow(df) / 2, y = -10, label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ])) - ) + ), + shape = 3, + size = 0.1, + position = "jitter" ) - - return(configs) } # Non-annotated version (_notext) @@ -567,7 +565,10 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA title = paste(plot_title_prefix, var, "above", sd_band, "SD"), sd_band = sd_band, enhancer_label = NULL, # No annotations for _notext - suppressor_label = NULL # No annotations for _notext + suppressor_label = NULL, # No annotations for _notext + shape = 3, + size = 0.1, + position = "jitter" ) } @@ -590,6 +591,8 @@ generate_correlation_plot_configs <- function(df, variables) { title = paste("Avg Zscore vs lm", variable), color_var = "Overlap", correlation_text = paste("R-squared =", round(df[[lm_r_squared_col]][1], 2)), + shape = 3, + geom_smooth = TRUE, legend_position = "right" ) } @@ -626,7 +629,7 @@ main <- function() { # Remove rows with 0 values in L df_no_zeros <- df_na %>% filter(L > 0) - # Set some constants + # Save some constants max_conc <- max(df$conc_num_factor) l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2 k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2 @@ -705,46 +708,56 @@ main <- function() { plot_type = "scatter", title = "Raw L vs K before quality control", color_var = "conc_num", + position = "jitter", legend_position = "right" ) ) + frequency_delta_bg_plots <- list( + list( + df = df_filtered_stats, + x_var = "delta_bg", + y_var = NULL, + plot_type = "density", + title = "Plate analysis by Drug Conc for Delta Background before quality control", + color_var = "conc_num", + x_label = "Delta Background", + y_label = "Density", + error_bar = FALSE, + legend_position = "right"), + list( + df = df_filtered_stats, + x_var = "delta_bg", + y_var = NULL, + plot_type = "bar", + title = "Plate analysis by Drug Conc for Delta Background before quality control", + color_var = "conc_num", + x_label = "Delta Background", + y_label = "Count", + error_bar = FALSE, + legend_position = "right") + ) + above_threshold_plots <- list( list( df = df_above_tolerance, x_var = "L", y_var = "K", plot_type = "scatter", - title = paste("Raw L vs K for strains above delta background threshold of", + title = paste("Raw L vs K for strains above Delta Background threshold of", df_above_tolerance$delta_bg_tolerance[[1]], "or above"), color_var = "conc_num", + position = "jitter", annotations = list( x = l_half_median, y = k_half_median, - label = paste("Strains above delta background tolerance =", nrow(df_above_tolerance)) + label = paste("# strains above Delta Background tolerance =", nrow(df_above_tolerance)) ), error_bar = FALSE, legend_position = "right" ) ) - frequency_delta_bg_plots <- list( - list(df = df_filtered_stats, x_var = "delta_bg", y_var = NULL, plot_type = "density", - title = "Plate analysis by Drug Conc for delta background before quality control", - color_var = "conc_num", - x_label = "Delta Background", - y_label = "Density", - error_bar = FALSE, - legend_position = "right"), - list(df = df_filtered_stats, x_var = "delta_bg", y_var = NULL, plot_type = "bar", - title = "Plate analysis by Drug Conc for delta background before quality control", - color_var = "conc_num", - x_label = "Delta Background", - y_label = "Count", - error_bar = FALSE, - legend_position = "right") - ) - plate_analysis_plots <- list() for (var in summary_vars) { for (stage in c("before", "after")) { @@ -760,7 +773,9 @@ main <- function() { y_var = var, plot_type = "scatter", title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"), - error_bar = TRUE, color_var = "conc_num") + error_bar = TRUE, + color_var = "conc_num", + position = "jitter") plate_analysis_plots <- append(plate_analysis_plots, list(config)) } @@ -797,7 +812,8 @@ main <- function() { plot_type = "scatter", title = paste("Plate analysis by Drug Conc for", var, "after quality control"), error_bar = TRUE, - color_var = "conc_num") + color_var = "conc_num", + position = "jitter") plate_analysis_no_zeros_plots <- append(plate_analysis_no_zeros_plots, list(config)) } @@ -827,6 +843,7 @@ main <- function() { plot_type = "scatter", title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", + position = "jitter", legend_position = "right" ) ) @@ -839,14 +856,15 @@ main <- function() { plot_type = "scatter", title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", + position = "jitter", legend_position = "right" ) ) message("Generating QC plots") generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) - generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) + generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)