From ddcc26050fa201c8eec8a18086918e96c31d0233 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sat, 5 Oct 2024 14:24:53 -0400 Subject: [PATCH] Regroup calculations --- .../apps/r/calculate_interaction_zscores.R | 71 +++++++++---------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 97251756..98345509 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -210,7 +210,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol # Calculate WT statistics from df_bg wt_stats <- df_bg %>% - group_by(across(all_of(group_vars)), conc_num, conc_num_factor_factor) %>% + group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>% summarise( WT_L = mean(mean_L, na.rm = TRUE), WT_sd_L = mean(sd_L, na.rm = TRUE), @@ -223,12 +223,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol .groups = "drop" ) - # Join WT stats back to df - df <- df %>% - left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor_factor")) - # Compute mean values at zero concentration - mean_L_zero_df <- df %>% + mean_zeroes <- df %>% filter(conc_num == 0) %>% group_by(across(all_of(group_vars))) %>% summarise( @@ -239,10 +235,11 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol .groups = "drop" ) - # Join mean_L_zero_df to df + # Join WT statistics to df df <- df %>% - left_join(mean_L_zero_df, by = group_vars) - + left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>% + left_join(mean_zeroes, by = c(group_vars)) + # Calculate Raw Shifts and Z Shifts df <- df %>% mutate( @@ -257,7 +254,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol ) calculations <- df %>% - group_by(across(all_of(group_vars))) %>% + group_by(across(all_of(c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")))) %>% mutate( NG_sum = sum(NG, na.rm = TRUE), DB_sum = sum(DB, na.rm = TRUE), @@ -289,6 +286,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol Zscore_r = Delta_r / WT_sd_r, Zscore_AUC = Delta_AUC / WT_sd_AUC ) %>% + ungroup() %>% # Ungroup before group_modify + group_by(across(all_of(group_vars))) %>% group_modify(~ { # Perform linear models only if there are enough unique conc_num_factor levels if (length(unique(.x$conc_num_factor)) > 1) { @@ -354,8 +353,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol left_join(delta_means_sds, by = group_vars) # Summary statistics for lm scores - lm_means_sds <- calculations %>% - summarise( + calculations <- calculations %>% + mutate( lm_mean_L = mean(lm_Score_L, na.rm = TRUE), lm_sd_L = sd(lm_Score_L, na.rm = TRUE), lm_mean_K = mean(lm_Score_K, na.rm = TRUE), @@ -363,25 +362,9 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol lm_mean_r = mean(lm_Score_r, na.rm = TRUE), lm_sd_r = sd(lm_Score_r, na.rm = TRUE), lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE), - lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE), - .groups = "drop" - ) - - # Add lm score means and standard deviations to calculations - calculations <- calculations %>% - mutate( - lm_mean_L = lm_means_sds$lm_mean_L, - lm_sd_L = lm_means_sds$lm_sd_L, - lm_mean_K = lm_means_sds$lm_mean_K, - lm_sd_K = lm_means_sds$lm_sd_K, - lm_mean_r = lm_means_sds$lm_mean_r, - lm_sd_r = lm_means_sds$lm_sd_r, - lm_mean_AUC = lm_means_sds$lm_mean_AUC, - lm_sd_AUC = lm_means_sds$lm_sd_AUC - ) - - # Calculate Z-lm scores - calculations <- calculations %>% + lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE) + ) %>% + # Calculate Z-lm scores mutate( Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L, Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K, @@ -534,7 +517,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) { num_plots <- length(plots) nrow <- ceiling(num_plots / grid_layout$ncol) - message("No nrow provided, automatically using nrow = ", nrow) + # message("No nrow provided, automatically using nrow = ", nrow) grid_layout$nrow <- nrow } @@ -563,11 +546,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width # Print rows being filtered out if (nrow(out_of_bounds_df) > 0) { + message("Filtered: ", config$title, "using y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]") message("# of filtered rows outside y-limits (for plotting): ", nrow(out_of_bounds_df)) # print(out_of_bounds_df) } - # Filter the valid data for plotting df <- df %>% filter( !is.na(.data[[config$y_var]]) & @@ -599,6 +582,15 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width } } + # Create a null plot with a "No data" message if no rows remain + # if (nrow(df) == 0) { + # plot <- ggplot() + + # geom_text(aes(0.5, 0.5), label = "No data available", size = 5) + + # theme_void() + ggtitle(config$title) + # } else { + # plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position) + # } + plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position) # Add appropriate plot layer or helper function based on plot type @@ -665,7 +657,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width ymax = !!custom_ymax_expr ), color = config$error_bar_params$color, - linewidth = 0.1 + linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth) ) } else { @@ -688,7 +680,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width ymax = .data[[y_mean_col]] + .data[[y_sd_col]] ), color = config$error_bar_params$color, - linewidth = 0.1 + linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth) ) } } @@ -793,7 +785,8 @@ generate_scatter_plot <- function(plot, config) { geom_abline( intercept = config$lm_line$intercept, slope = config$lm_line$slope, - color = smooth_color + color = smooth_color, + linewidth = 1 ) } } @@ -1072,7 +1065,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type) ymin = paste0("0 - 2 * WT_sd_", var), ymax = paste0("0 + 2 * WT_sd_", var) ), - color = "gray" + color = "gray70", + linewidth = 0.5 ), x_breaks = unique(group_data$conc_num_factor_factor), x_labels = as.character(unique(group_data$conc_num)), @@ -1564,7 +1558,8 @@ main <- function() { )$df_with_stats message("Calculating reference strain interaction scores") - reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug")) + reference_results <- calculate_interaction_scores(df_reference_interaction_stats, + df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug")) df_reference_calculations <- reference_results$calculations df_reference_interactions <- reference_results$interactions df_reference_interactions_joined <- reference_results$full_data