From a309130c399c1ab9ef5320d2148c3e31a0a9dfda Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sat, 5 Oct 2024 20:44:25 -0400 Subject: [PATCH] Correct grouping for deletion WT summarization --- .../apps/r/calculate_interaction_zscores.R | 66 +++++++++++-------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index e21b876f..2cd527af 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -204,14 +204,22 @@ calculate_summary_stats <- function(df, variables, group_vars) { return(list(summary_stats = summary_stats, df_with_stats = df_joined)) } -calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) { +calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) { max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE) total_conc_num <- length(unique(df$conc_num)) + if (type == "reference") { + bg_group_vars <- c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor") + group_vars <- c("OrfRep", "Gene", "num", "Drug") + } else if (type == "deletion") { + bg_group_vars <- c("Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor") + group_vars <- c("OrfRep", "Gene", "Drug") + } + # Calculate WT statistics from df_bg wt_stats <- df_bg %>% - group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>% + group_by(across(all_of(bg_group_vars))) %>% summarise( WT_L = mean(mean_L, na.rm = TRUE), WT_sd_L = mean(sd_L, na.rm = TRUE), @@ -224,6 +232,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol .groups = "drop" ) + # Join WT statistics to df + df <- df %>% + left_join(wt_stats, by = bg_group_vars) + # Compute mean values at zero concentration mean_zeroes <- df %>% filter(conc_num == 0) %>% @@ -236,9 +248,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol .groups = "drop" ) - # Join WT statistics to df df <- df %>% - left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>% left_join(mean_zeroes, by = c(group_vars)) # Calculate Raw Shifts and Z Shifts @@ -702,10 +712,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width total_spots <- grid_layout$nrow * grid_layout$ncol num_plots <- length(static_plots) - if (num_plots < total_spots) { - message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") - static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) - } + # if (num_plots < total_spots) { + # message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") + # static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) + # } + grid.arrange( grobs = static_plots, ncol = grid_layout$ncol, @@ -775,7 +786,6 @@ generate_scatter_plot <- function(plot, config) { linewidth = ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1) ) } - # Add SD Bands if specified if (!is.null(config$sd_band)) { @@ -1538,25 +1548,22 @@ main <- function() { .groups = "drop" ) - message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction - df_reference_interaction_stats <- calculate_summary_stats( - df = df_reference, - variables = c("L", "K", "r", "AUC"), - group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") - )$df_with_stats + # message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction + # df_reference_interaction_stats <- calculate_summary_stats( + # df = df_reference, + # variables = c("L", "K", "r", "AUC"), + # group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") + # )$df_with_stats - message("Calculating reference strain interaction scores") - reference_results <- calculate_interaction_scores(df_reference_interaction_stats, - df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug")) - df_reference_calculations <- reference_results$calculations - df_reference_interactions <- reference_results$interactions - df_reference_interactions_joined <- reference_results$full_data - write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) - write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) + # # message("Calculating reference strain interaction scores") + # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") + # df_reference_interactions_joined <- reference_results$full_data + # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) + # write.csv(reference_results$interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) - message("Generating reference interaction plots") - reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") - generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) + # # message("Generating reference interaction plots") + # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") + # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) message("Setting missing deletion values to the highest theoretical value at each drug conc for L") df_deletion <- df_na_stats %>% # formerly X2 @@ -1578,13 +1585,14 @@ main <- function() { )$df_with_stats message("Calculating deletion strain(s) interactions scores") - deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) - df_calculations <- deletion_results$calculations + deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion") df_interactions <- deletion_results$interactions df_interactions_joined <- deletion_results$full_data - write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) + write.csv(deletion_results$calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) + print(df_interactions_joined, n = 20, width = 100000) + message("Generating deletion interaction plots") deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion") generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)