From 2d3a0b5add5951ecb3a8e3a70825d858eda2e2de Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Thu, 12 Sep 2024 00:48:32 -0400 Subject: [PATCH] Add more plotting nuances --- .../apps/r/calculate_interaction_zscores.R | 173 +++++++++--------- 1 file changed, 82 insertions(+), 91 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 184a28ca..c24c504c 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -356,39 +356,52 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la # Plot type handling plot <- switch(config$plot_type, - "scatter" = { - plot + geom_point(aes(ORF = ORF, Gene = Gene, !!sym(config$x_var) := !!sym(config$x_var)), - shape = config$shape %||% 3, size = config$size %||% 0.6) + - (if (!is.null(config$add_smooth) && config$add_smooth) - geom_smooth(method = "lm", se = FALSE) - else NULL) + - (if (!is.null(config$position) && config$position == "jitter") - geom_point(position = "jitter") - else NULL) + - # Use precalculated mean and sd for error bars + plot <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) { + plot + geom_point(aes(ORF = ORF, Gene = Gene, delta_bg = delta_bg), config$shape %||% 3) + } else if (!is.null(config$gene_point) && config$gene_point) { + plot + geom_point(aes(ORF = ORF, Gene = Gene, Gene = Gene), shape = config$shape %||% 3, position = "jitter") + } else if (!is.null(config$position) && config$position == "jitter") { + plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2, position = "jitter") + } else { + plot + geom_point(shape = config$shape %||% 3, size = config$size %||% 0.2) + } + + if (!is.null(config$add_smooth) && config$add_smooth) { + plot <- plot + geom_smooth(method = "lm", se = FALSE) + } + + plot <- plot + geom_errorbar(aes( ymin = !!sym(paste0("mean_", config$y_var)) - !!sym(paste0("sd_", config$y_var)), - ymax = !!sym(paste0("mean_", config$y_var)) + !!sym(paste0("sd_", config$y_var))), width = 0.1) + + ymax = !!sym(paste0("mean_", config$y_var)) + !!sym(paste0("sd_", config$y_var))), + width = 0.1) + geom_point(aes(y = !!sym(paste0("mean_", config$y_var))), size = 0.6) + + plot }, "rank" = { - plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3) + - (if (!is.null(config$sd_band)) - Reduce(`+`, lapply(seq_len(config$sd_band), function(i) { - list( - annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3), - annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3), - geom_hline(yintercept = c(-i, i), color = "gray") - ) - })) else NULL) + - (if (!is.null(config$enhancer_label)) - annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) - else NULL) + - (if (!is.null(config$suppressor_label)) - annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) - else NULL) + plot <- plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3) + + if (!is.null(config$sd_band)) { + for (i in seq_len(config$sd_band)) { + plot <- plot + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + + geom_hline(yintercept = c(-i, i), color = "gray") + } + } + + if (!is.null(config$enhancer_label)) { + plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) + } + + if (!is.null(config$suppressor_label)) { + plot <- plot + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) + } + + plot }, "correlation" = plot + geom_point(shape = config$shape %||% 3, color = "gray70") + @@ -397,43 +410,17 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la "box" = plot + geom_boxplot(), "density" = plot + geom_density(), - "bar" = plot + geom_bar(), - - # Default case (scatter with smooth line) - plot + geom_point(shape = config$shape %||% 3) + geom_smooth(method = "lm", se = FALSE) + "bar" = plot + geom_bar() ) - # Error bars using pre-calculated mean and sd columns - if (!is.null(config$error_bar) && config$error_bar) { - y_mean_col <- paste0("mean_", config$y_var) - y_sd_col <- paste0("sd_", config$y_var) - plot <- plot + geom_errorbar(aes( - ymin = !!sym(y_mean_col) - !!sym(y_sd_col), - ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + - geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) - } - - # Y-limits and labels - plot <- plot + (if (!is.null(config$ylim_vals)) coord_cartesian(ylim = config$ylim_vals) else NULL) + - ggtitle(config$title) + - theme_publication(legend_position = config$legend_position %||% "bottom") + - xlab(config$x_label %||% "") + ylab(config$y_label %||% "") - - # Annotations - if (!is.null(config$annotations)) { - for (annotation in config$annotations) { - plot <- plot + geom_text(aes(x = annotation$x, y = annotation$y, label = annotation$label)) - } - } - - return(plot) + plot }) - + # Save plots to PDF and HTML pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) lapply(plots, print) dev.off() - + plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))) combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05) saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) @@ -442,43 +429,39 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la generate_interaction_plot_configs <- function(df, variables) { configs <- list() - for (variable in variables) { - # Define the y-limits based on the variable being plotted - ylim_vals <- switch(variable, - "L" = c(-65, 65), - "K" = c(-65, 65), - "r" = c(-0.65, 0.65), - "AUC" = c(-6500, 6500) - ) + # Predefine y-limits and annotation y-values for each variable + variable_properties <- list( + "L" = list(ylim = c(-65, 65), annotations_y = c(45, 25, -25, -35, -45)), + "K" = list(ylim = c(-65, 65), annotations_y = c(45, 25, -25, -35, -45)), + "r" = list(ylim = c(-0.65, 0.65), annotations_y = c(0.45, 0.25, -0.25, -0.35, -0.45)), + "AUC" = list(ylim = c(-6500, 6500), annotations_y = c(4500, 2500, -2500, -3500, -4500)) + ) - # Dynamically generate the column names for standard deviation and delta + for (variable in variables) { + props <- variable_properties[[variable]] + + # Dynamically generate column names wt_sd_col <- paste0("WT_sd_", variable) delta_var <- paste0("Delta_", variable) z_shift <- paste0("Z_Shift_", variable) z_lm <- paste0("Z_lm_", variable) + lm_score <- paste0("lm_Score_", variable) # Precomputed lm score + r_squared <- paste0("r_squared_", variable) # Precomputed R^2 - # Set annotations for ZShift, Z lm Score, NG, DB, SM - annotations <- list( - list(x = 1, y = ifelse(variable == "L", 45, ifelse(variable == "K", 45, - ifelse(variable == "r", 0.45, 4500))), label = paste("ZShift =", round(df[[z_shift]], 2))), - list(x = 1, y = ifelse(variable == "L", 25, ifelse(variable == "K", 25, - ifelse(variable == "r", 0.25, 2500))), label = paste("lm ZScore =", round(df[[z_lm]], 2))), - list(x = 1, y = ifelse(variable == "L", -25, ifelse(variable == "K", -25, - ifelse(variable == "r", -0.25, -2500))), label = paste("NG =", df$NG)), - list(x = 1, y = ifelse(variable == "L", -35, ifelse(variable == "K", -35, - ifelse(variable == "r", -0.35, -3500))), label = paste("DB =", df$DB)), - list(x = 1, y = ifelse(variable == "L", -45, ifelse(variable == "K", -45, - ifelse(variable == "r", -0.45, -4500))), label = paste("SM =", df$SM)) - ) + # Create annotation list + annotation_labels <- c("ZShift =", "lm ZScore =", "NG =", "DB =", "SM =") + annotations <- lapply(seq_along(annotation_labels), function(i) { + list(x = 1, y = props$annotations_y[i], label = paste(annotation_labels[i], round(df[[c(z_shift, z_lm, "NG", "DB", "SM")[i]]], 2))) + }) - # Add scatter plot configuration for this variable - configs[[length(configs) + 1]] <- list( + # Create scatter plot configuration using precomputed lm scores + scatter_config <- list( df = df, x_var = "conc_num_factor", y_var = delta_var, plot_type = "scatter", title = sprintf("%s %s", df$OrfRep[1], df$Gene[1]), - ylim_vals = ylim_vals, + ylim_vals = props$ylim, annotations = annotations, error_bar = list( ymin = 0 - (2 * df[[wt_sd_col]][1]), @@ -489,28 +472,36 @@ generate_interaction_plot_configs <- function(df, variables) { x_label = unique(df$Drug[1]), shape = 3, size = 0.6, - position = "jitter" + position = "jitter", + lm_line = list( + intercept = coef(lm(df[[delta_var]] ~ df$conc_num_factor))[1], # Intercept from lm model + slope = coef(lm(df[[delta_var]] ~ df$conc_num_factor))[2] # Slope from lm model + ) ) - # Add box plot configuration for this variable - configs[[length(configs) + 1]] <- list( + # Create box plot configuration for this variable + box_config <- list( df = df, x_var = "conc_num_factor", y_var = variable, plot_type = "box", title = sprintf("%s %s (Boxplot)", df$OrfRep[1], df$Gene[1]), - ylim_vals = ylim_vals, + ylim_vals = props$ylim, annotations = annotations, - error_bar = FALSE, # Boxplots typically don't need error bars + error_bar = FALSE, x_breaks = unique(df$conc_num_factor), x_labels = unique(as.character(df$conc_num)), x_label = unique(df$Drug[1]) ) + + # Append both scatter and box plot configurations + configs <- append(configs, list(scatter_config, box_config)) } return(configs) } + # Adjust missing values and calculate ranks adjust_missing_and_rank <- function(df, variables) { @@ -706,9 +697,10 @@ main <- function() { x_var = "L", y_var = "K", plot_type = "scatter", + delta_bg_point = TRUE, title = "Raw L vs K before quality control", color_var = "conc_num", - position = "jitter", + error_bar = FALSE, legend_position = "right" ) ) @@ -744,6 +736,7 @@ main <- function() { x_var = "L", y_var = "K", plot_type = "scatter", + delta_bg_point = TRUE, title = paste("Raw L vs K for strains above Delta Background threshold of", df_above_tolerance$delta_bg_tolerance[[1]], "or above"), color_var = "conc_num", @@ -804,7 +797,6 @@ main <- function() { plate_analysis_no_zeros_plots <- list() for (var in summary_vars) { - config <- list( df = df_no_zeros_filtered_stats, x_var = "scan", @@ -820,9 +812,6 @@ main <- function() { plate_analysis_no_zeros_boxplots <- list() for (var in summary_vars) { - - - # Create the plot configuration config <- list( df = df_no_zeros_filtered_stats, x_var = "scan", @@ -841,6 +830,7 @@ main <- function() { x_var = "L", y_var = "K", plot_type = "scatter", + delta_bg_point = TRUE, title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", position = "jitter", @@ -854,6 +844,7 @@ main <- function() { x_var = "delta_bg", y_var = "K", plot_type = "scatter", + gene_point = TRUE, title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", position = "jitter",