From 320338316cf31367d7cea0acb60df3f32ea3c1be Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Fri, 4 Oct 2024 15:19:05 -0400 Subject: [PATCH] Add missing calculation columns for Delta_L error bars --- .../apps/r/calculate_interaction_zscores.R | 94 ++++++++++++------- qhtcp-workflow/qhtcp-workflow | 1 + 2 files changed, 59 insertions(+), 36 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 0adf5b5a..23efeb41 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -347,6 +347,25 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol }) %>% ungroup() + # For interaction plot error bars + delta_means <- calculations %>% + group_by(across(all_of(group_vars))) %>% + summarise( + mean_Delta_L = mean(Delta_L, na.rm = TRUE), + mean_Delta_K = mean(Delta_K, na.rm = TRUE), + mean_Delta_r = mean(Delta_r, na.rm = TRUE), + mean_Delta_AUC = mean(Delta_AUC, na.rm = TRUE), + sd_Delta_L = sd(Delta_L, na.rm = TRUE), + sd_Delta_K = sd(Delta_K, na.rm = TRUE), + sd_Delta_r = sd(Delta_r, na.rm = TRUE), + sd_Delta_AUC = sd(Delta_AUC, na.rm = TRUE), + + .groups = "drop" + ) + + calculations <- calculations %>% + left_join(delta_means, by = group_vars) + # Summary statistics for lm scores lm_means_sds <- calculations %>% summarise( @@ -462,6 +481,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC, Exp_L, Exp_K, Exp_r, Exp_AUC, Delta_L, Delta_K, Delta_r, Delta_AUC, + mean_Delta_L, mean_Delta_K, mean_Delta_r, mean_Delta_AUC, Zscore_L, Zscore_K, Zscore_r, Zscore_AUC ) @@ -548,8 +568,8 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) { # Print rows being filtered out if (nrow(out_of_bounds_df) > 0) { - message("Filtered out rows outside y-limits:") - print(out_of_bounds_df) + message("# of filtered rows outside y-limits (for plotting): ", nrow(out_of_bounds_df)) + # print(out_of_bounds_df) } # Filter the valid data for plotting @@ -646,11 +666,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) { } # Convert ggplot to plotly for interactive version - plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) + # plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) # Store both static and interactive versions static_plots[[i]] <- plot - plotly_plots[[i]] <- plotly_plot + # plotly_plots[[i]] <- plotly_plot } # Print the plots in the current group to the PDF @@ -987,7 +1007,7 @@ generate_interaction_plot_configs <- function(df, type) { df = group_data, plot_type = "scatter", x_var = "conc_num_factor_factor", - y_var = var, + y_var = paste0("Delta_", var), x_label = unique(group_data$Drug)[1], title = paste(OrfRepTitle, Gene, num, sep = " "), coord_cartesian = y_limits, @@ -1019,10 +1039,11 @@ generate_interaction_plot_configs <- function(df, type) { } } + # Return plot configs return(list( - list(grid_layout = list(ncol = 2), plots = stats_plot_configs), # nrow will be calculated dynamically - list(grid_layout = list(ncol = 2), plots = stats_boxplot_configs), # nrow will be calculated dynamically - list(grid_layout = list(ncol = 4), plots = delta_plot_configs) # nrow will be calculated dynamically + list(grid_layout = list(ncol = 2), plots = stats_plot_configs), + list(grid_layout = list(ncol = 2), plots = stats_boxplot_configs), + list(grid_layout = list(ncol = 4), plots = delta_plot_configs) # nrow calculated dynamically )) } @@ -1464,7 +1485,8 @@ main <- function() { variables = c("L", "K", "r", "AUC"), group_vars = c("OrfRep", "Gene", "Drug", "num", "conc_num", "conc_num_factor_factor") )$df_with_stats - message("Calculating reference strain interaction scores") + + message("Calculating reference strain interaction scores") results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug", "num")) df_calculations_reference <- results$calculations df_interactions_reference <- results$interactions @@ -1472,37 +1494,37 @@ main <- function() { write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) - # message("Setting missing deletion values to the highest theoretical value at each drug conc for L") - # df_deletion <- df_na_stats %>% # formerly X2 - # filter(OrfRep != strain) %>% - # filter(!is.na(L)) %>% - # group_by(OrfRep, Gene, conc_num) %>% - # mutate( - # max_l_theoretical = max(max_L, na.rm = TRUE), - # L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L), - # SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM), - # L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>% - # ungroup() - - # message("Calculating deletion strain(s) summary statistics") - # df_deletion_stats <- calculate_summary_stats( - # df = df_deletion, - # variables = c("L", "K", "r", "AUC"), - # group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor") - # )$df_with_stats - - # message("Calculating deletion strain(s) interactions scores") - # results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) - # df_calculations <- results$calculations - # df_interactions <- results$interactions - # df_interactions_joined <- results$full_data - # write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) - # write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) - message("Generating reference interaction plots") reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, "reference") generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs) + message("Setting missing deletion values to the highest theoretical value at each drug conc for L") + df_deletion <- df_na_stats %>% # formerly X2 + filter(OrfRep != strain) %>% + filter(!is.na(L)) %>% + group_by(OrfRep, Gene, conc_num) %>% + mutate( + max_l_theoretical = max(max_L, na.rm = TRUE), + L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L), + SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM), + L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>% + ungroup() + + message("Calculating deletion strain(s) summary statistics") + df_deletion_stats <- calculate_summary_stats( + df = df_deletion, + variables = c("L", "K", "r", "AUC"), + group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor") + )$df_with_stats + + message("Calculating deletion strain(s) interactions scores") + results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) + df_calculations <- results$calculations + df_interactions <- results$interactions + df_interactions_joined <- results$full_data + write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) + write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) + message("Generating deletion interaction plots") deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, "deletion") generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs) diff --git a/qhtcp-workflow/qhtcp-workflow b/qhtcp-workflow/qhtcp-workflow index 323ab6b3..d5919530 100755 --- a/qhtcp-workflow/qhtcp-workflow +++ b/qhtcp-workflow/qhtcp-workflow @@ -1443,6 +1443,7 @@ wrapper calculate_interaction_zscores # * Plate analysis error bars and some others will be slightly different # * Can be changed back but better to have plots reflect data, no? # * Dynamically generate axis limits based on data (if desired) +# * Parallelize interaction plotting # # INPUT #