From c6fec56630ea2f4c7250f2d440c4989c9fa7b7ff Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 6 Oct 2024 17:47:18 -0400 Subject: [PATCH] Move y-limit filtering from generate_and_save_plots to generate_interaction_plot_configs --- .../apps/r/calculate_interaction_zscores.R | 143 +++++++++--------- 1 file changed, 70 insertions(+), 73 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index d859c8b6..b9777440 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -525,36 +525,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width config <- group$plots[[i]] df <- config$df - # Filter and debug out-of-bounds data - if (!is.null(config$ylim_vals)) { - out_of_bounds <- df %>% - filter( - is.na(.data[[config$y_var]]) | - .data[[config$y_var]] < config$ylim_vals[1] | - .data[[config$y_var]] > config$ylim_vals[2] - ) - if (nrow(out_of_bounds) > 0) { - message("Filtered ", nrow(out_of_bounds), " row(s) from '", config$title, "' because ", config$y_var, - " is outside of y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]:") - # print(out_of_bounds %>% select(OrfRep, Gene, num, Drug, scan, Plate, Row, Col, conc_num, all_of(config$y_var)), width = 1000) - } - df <- df %>% - filter( - !is.na(.data[[config$y_var]]) & - .data[[config$y_var]] >= config$ylim_vals[1] & - .data[[config$y_var]] <= config$ylim_vals[2] - ) - } - # Filter NAs if (!is.null(config$filter_na) && config$filter_na) { df <- df %>% filter(!is.na(.data[[config$y_var]])) } + # TODO for now skip all NA plots NA data + # Eventually add to own or filter_na block so we can handle selectively if (nrow(df) == 0) { - message("No data available after filtering for plot ", config$title) - next # Skip this plot if no data is available + message("Insufficient data for plot:", config$title) + next # skip plot if insufficient data is available } aes_mapping <- if (config$plot_type == "bar") { @@ -599,10 +580,10 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label) if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian) - plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) + #plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) static_plots[[i]] <- plot - plotly_plots[[i]] <- plotly_plot + #plotly_plots[[i]] <- plotly_plot } grid_layout <- group$grid_layout @@ -624,16 +605,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) } - tryCatch({ - grid.arrange( - grobs = static_plots, - ncol = grid_layout$ncol, - nrow = grid_layout$nrow - ) - }, error = function(e) { - message("Error in grid.arrange: ", e$message) - print(static_plots) - }) + grid.arrange( + grobs = static_plots, + ncol = grid_layout$ncol, + nrow = grid_layout$nrow + ) } else { for (plot in static_plots) { @@ -644,15 +620,14 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width dev.off() - out_html_file <- file.path(out_dir, paste0(filename, ".html")) - message("Saving combined HTML file: ", out_html_file) - htmltools::save_html( - htmltools::tagList(plotly_plots), - file = out_html_file - ) + # out_html_file <- file.path(out_dir, paste0(filename, ".html")) + # message("Saving combined HTML file: ", out_html_file) + # htmltools::save_html( + # htmltools::tagList(plotly_plots), + # file = out_html_file + # ) } - generate_scatter_plot <- function(plot, config) { # Define the points @@ -790,7 +765,7 @@ generate_scatter_plot <- function(plot, config) { ) } } else { - message("Skipping linear modeling line due to y-values outside of limits.") + message("Skipping linear regression line due to y-values outside of limits") } } else { # If no y-limits are provided, proceed with the annotation @@ -805,7 +780,7 @@ generate_scatter_plot <- function(plot, config) { ) } } else { - message("Skipping linear modeling line due to missing or invalid values.") + message("Skipping linear regression line due to missing or invalid values") } } @@ -984,7 +959,6 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) ) plot_config$position <- "jitter" - # Cannot figure out how to place these properly for discrete x-axis so let's be hacky annotations <- list( list(x = 0.25, y = y_limits[1] + 0.08 * y_span, label = " NG =", size = 4), list(x = 0.25, y = y_limits[1] + 0.04 * y_span, label = " DB =", size = 4), @@ -1045,26 +1019,50 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) for (var in names(delta_limits_map)) { y_limits <- delta_limits_map[[var]] y_span <- y_limits[2] - y_limits[1] + y_var_name <- paste0("Delta_", var) - WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0) - Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2) - Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2) - R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2) + # Anti-filter to select out-of-bounds rows + out_of_bounds <- group_data %>% + filter(is.na(.data[[y_var_name]]) | + .data[[y_var_name]] < y_limits[1] | + .data[[y_var_name]] > y_limits[2]) + + if (nrow(out_of_bounds) > 0) { + message(sprintf("Filtered %d row(s) from '%s' because %s is outside of y-limits: [%f, %f]", + nrow(out_of_bounds), OrfRepTitle, y_var_name, y_limits[1], y_limits[2] + )) + } - NG_value <- first(group_data$NG, default = 0) - DB_value <- first(group_data$DB, default = 0) - SM_value <- first(group_data$SM, default = 0) + # Do the actual filtering + group_data_filtered <- group_data %>% + filter(!is.na(.data[[y_var_name]]) & + .data[[y_var_name]] >= y_limits[1] & + .data[[y_var_name]] <= y_limits[2]) + if (nrow(group_data_filtered) == 0) { + message("Insufficient data for plot: ", OrfRepTitle, " ", var) + next # skip plot if insufficient data is available + } + + WT_sd_value <- first(group_data_filtered[[paste0("WT_sd_", var)]], default = 0) + Z_Shift_value <- round(first(group_data_filtered[[paste0("Z_Shift_", var)]], default = 0), 2) + Z_lm_value <- round(first(group_data_filtered[[paste0("Z_lm_", var)]], default = 0), 2) + R_squared_value <- round(first(group_data_filtered[[paste0("R_Squared_", var)]], default = 0), 2) + + NG_value <- first(group_data_filtered$NG, default = 0) + DB_value <- first(group_data_filtered$DB, default = 0) + SM_value <- first(group_data_filtered$SM, default = 0) + lm_intercept_col <- paste0("lm_intercept_", var) lm_slope_col <- paste0("lm_slope_", var) - lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0) - lm_slope_value <- first(group_data[[lm_slope_col]], default = 0) - + lm_intercept_value <- first(group_data_filtered[[lm_intercept_col]], default = 0) + lm_slope_value <- first(group_data_filtered[[lm_slope_col]], default = 0) + plot_config <- list( - df = group_data, + df = group_data_filtered, plot_type = "scatter", x_var = "conc_num_factor_factor", - y_var = paste0("Delta_", var), + y_var = y_var_name, x_label = paste0("[", unique(df_summary$Drug)[1], "]"), shape = 16, title = paste(OrfRepTitle, Gene, sep = " "), @@ -1087,27 +1085,26 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) color = "gray70", linewidth = 0.5 ), - x_breaks = unique(group_data$conc_num_factor_factor), - x_labels = as.character(unique(group_data$conc_num)), + x_breaks = unique(group_data_filtered$conc_num_factor_factor), + x_labels = as.character(unique(group_data_filtered$conc_num)), ylim_vals = y_limits, - # filter_na = TRUE, lm_line = list( intercept = lm_intercept_value, slope = lm_slope_value, color = "blue", linewidth = 0.8, - x_min = min(as.numeric(group_data$conc_num_factor_factor)), - x_max = max(as.numeric(group_data$conc_num_factor_factor)) + x_min = min(as.numeric(group_data_filtered$conc_num_factor_factor)), + x_max = max(as.numeric(group_data_filtered$conc_num_factor_factor)) ) ) delta_plot_configs <- append(delta_plot_configs, list(plot_config)) } } - + # Group delta plots in chunks of 12 per page chunk_size <- 12 delta_plot_chunks <- split(delta_plot_configs, ceiling(seq_along(delta_plot_configs) / chunk_size)) - + return(c( list(list(grid_layout = list(ncol = 2), plots = stats_plot_configs)), list(list(grid_layout = list(ncol = 2), plots = stats_boxplot_configs)), @@ -1587,16 +1584,16 @@ main <- function() { group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") )$df_with_stats - # message("Calculating reference strain interaction scores") - # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") - # df_reference_interactions_joined <- reference_results$full_data - # df_reference_interactions <- reference_results$interactions - # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) - # write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) + message("Calculating reference strain interaction scores") + reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") + df_reference_interactions_joined <- reference_results$full_data + df_reference_interactions <- reference_results$interactions + write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) + write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) - # message("Generating reference interaction plots") - # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") - # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) + message("Generating reference interaction plots") + reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") + generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) message("Setting missing deletion values to the highest theoretical value at each drug conc for L") df_deletion <- df_na_stats %>% # formerly X2