From 08bf4946e04388784a0726ac6ab32f97fec443dc Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sat, 5 Oct 2024 17:08:01 -0400 Subject: [PATCH] More gracefully handle missing plot configs --- .../apps/r/calculate_interaction_zscores.R | 101 ++++++++---------- 1 file changed, 44 insertions(+), 57 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 8242b93b..e21b876f 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -7,6 +7,7 @@ suppressMessages({ library("rlang") library("ggthemes") library("data.table") + library("grid") library("gridExtra") library("future") library("furrr") @@ -506,31 +507,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width grid_layout <- group$grid_layout plots <- group$plots - # Only handle grid layout if it exists - if (!is.null(grid_layout)) { - # Set grid_ncol to 1 if not specified - if (is.null(grid_layout$ncol)) { - grid_layout$ncol <- 1 - } - - # If ncol is set but nrow is not, calculate nrow dynamically based on num_plots - if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) { - num_plots <- length(plots) - nrow <- ceiling(num_plots / grid_layout$ncol) - # message("No nrow provided, automatically using nrow = ", nrow) - grid_layout$nrow <- nrow - } - - # Fill missing spots with nullGrob() if necessary - total_spots <- grid_layout$nrow * grid_layout$ncol - num_plots <- length(plots) - - if (num_plots < total_spots) { - message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") - plots <- c(plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) - } - } - for (i in seq_along(plots)) { config <- plots[[i]] df <- config$df @@ -546,9 +522,9 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width # Print rows being filtered out if (nrow(out_of_bounds_df) > 0) { - message("Filtered: ", config$title, "using y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]") + message("Filtered: ", config$title, " using y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]") message("# of filtered rows outside y-limits (for plotting): ", nrow(out_of_bounds_df)) - # print(out_of_bounds_df) + print(out_of_bounds_df) } df <- df %>% @@ -582,15 +558,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width } } - # Create a null plot with a "No data" message if no rows remain - # if (nrow(df) == 0) { - # plot <- ggplot() + - # geom_text(aes(0.5, 0.5), label = "No data available", size = 5) + - # theme_void() + ggtitle(config$title) - # } else { - # plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position) - # } - plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position) # Add appropriate plot layer or helper function based on plot type @@ -718,18 +685,37 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width } # Print the plots in the current group to the PDF - if (is.null(grid_layout)) { - # Print each plot individually on separate pages if no grid layout is specified - for (plot in static_plots) { - print(plot) + if (!is.null(grid_layout)) { + # Set grid_ncol to 1 if not specified + if (is.null(grid_layout$ncol)) { + grid_layout$ncol <- 1 + } + + # If ncol is set but nrow is not, calculate nrow dynamically based on num_plots + if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) { + num_plots <- length(static_plots) + nrow <- ceiling(num_plots / grid_layout$ncol) + # message("No nrow provided, automatically using nrow = ", nrow) + grid_layout$nrow <- nrow + } + + total_spots <- grid_layout$nrow * grid_layout$ncol + num_plots <- length(static_plots) + + if (num_plots < total_spots) { + message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") + static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) } - } else { - # Arrange plots in grid layout on a single page grid.arrange( grobs = static_plots, ncol = grid_layout$ncol, nrow = grid_layout$nrow ) + } else { + # Print individual plots on separate pages if no grid layout + for (plot in static_plots) { + print(plot) + } } } @@ -917,7 +903,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df return(list(plots = plot_configs)) } -generate_interaction_plot_configs <- function(df_summary, df_interaction, type) { +generate_interaction_plot_configs <- function(df_summary, df_interactions, type) { # Define the y-limits for the plots limits_map <- list( @@ -982,7 +968,6 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type) } plot_config$annotations <- annotations - stats_plot_configs <- append(stats_plot_configs, list(plot_config)) } else if (plot_type == "box") { @@ -995,12 +980,6 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type) } # Delta interaction plots - if (type == "reference") { - group_vars <- c("OrfRep", "Gene", "num") - } else if (type == "deletion") { - group_vars <- c("OrfRep", "Gene") - } - delta_limits_map <- list( L = c(-60, 60), K = c(-60, 60), @@ -1008,19 +987,26 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type) AUC = c(-6000, 6000) ) - grouped_data <- df_interaction %>% + # Select the data grouping by data type + if (type == "reference") { + group_vars <- c("OrfRep", "Gene", "num") + } else if (type == "deletion") { + group_vars <- c("OrfRep", "Gene") + } + + grouped_data <- df_interactions %>% group_by(across(all_of(group_vars))) %>% group_split() for (group_data in grouped_data) { + # Build the plot title OrfRep <- first(group_data$OrfRep) Gene <- first(group_data$Gene) - num <- if ("num" %in% names(group_data)) first(group_data$num) else "" - if (type == "reference") { - OrfRepTitle <- paste(OrfRep, Gene, num, sep = "_") + num <- if ("num" %in% names(group_data)) first(group_data$num) else "" + OrfRepTitle <- paste(OrfRep, Gene, num, sep = "_") } else if (type == "deletion") { - OrfRepTitle <- OrfRep + OrfRepTitle <- OrfRep } for (var in names(delta_limits_map)) { @@ -1049,7 +1035,7 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type) x_label = paste0("[", unique(df_summary$Drug)[1], "]"), shape = 16, title = paste(OrfRepTitle, Gene, sep = " "), - title_size = rel(1.3), + title_size = rel(1.4), coord_cartesian = y_limits, annotations = list( list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste(" ZShift =", round(Z_Shift_value, 2))), @@ -1089,6 +1075,7 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type) chunk_size <- 12 delta_plot_chunks <- split(delta_plot_configs, ceiling(seq_along(delta_plot_configs) / chunk_size)) + # TODO, only return first page of plots for testing, remove this later return(c( list(list(grid_layout = list(ncol = 2), plots = stats_plot_configs)), list(list(grid_layout = list(ncol = 2), plots = stats_boxplot_configs)), @@ -1575,7 +1562,7 @@ main <- function() { df_deletion <- df_na_stats %>% # formerly X2 filter(OrfRep != strain) %>% filter(!is.na(L)) %>% - group_by(OrfRep, Gene, conc_num) %>% + group_by(OrfRep, Gene, conc_num, conc_num_factor_factor) %>% mutate( max_l_theoretical = max(max_L, na.rm = TRUE), L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),