From f207e40efd84d90f984e816ec47360dcded0c897 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sat, 5 Oct 2024 06:00:25 -0400 Subject: [PATCH] Improve WT groupings --- .../apps/r/calculate_interaction_zscores.R | 49 ++++++------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 4d93541a..97251756 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -210,7 +210,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol # Calculate WT statistics from df_bg wt_stats <- df_bg %>% - filter(conc_num == 0) %>% + group_by(across(all_of(group_vars)), conc_num, conc_num_factor_factor) %>% summarise( WT_L = mean(mean_L, na.rm = TRUE), WT_sd_L = mean(sd_L, na.rm = TRUE), @@ -219,21 +219,13 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol WT_r = mean(mean_r, na.rm = TRUE), WT_sd_r = mean(sd_r, na.rm = TRUE), WT_AUC = mean(mean_AUC, na.rm = TRUE), - WT_sd_AUC = mean(sd_AUC, na.rm = TRUE) + WT_sd_AUC = mean(sd_AUC, na.rm = TRUE), + .groups = "drop" ) - # Add WT statistics to df + # Join WT stats back to df df <- df %>% - mutate( - WT_L = wt_stats$WT_L, - WT_sd_L = wt_stats$WT_sd_L, - WT_K = wt_stats$WT_K, - WT_sd_K = wt_stats$WT_sd_K, - WT_r = wt_stats$WT_r, - WT_sd_r = wt_stats$WT_sd_r, - WT_AUC = wt_stats$WT_AUC, - WT_sd_AUC = wt_stats$WT_sd_AUC - ) + left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor_factor")) # Compute mean values at zero concentration mean_L_zero_df <- df %>% @@ -804,16 +796,6 @@ generate_scatter_plot <- function(plot, config) { color = smooth_color ) } - - # For now I want to try and hardcode it - # else { - # plot <- plot + - # geom_smooth( - # method = "lm", - # se = FALSE, - # color = smooth_color - # ) - # } } # Add SD Bands if specified @@ -1077,8 +1059,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type) title_size = rel(1.3), coord_cartesian = y_limits, annotations = list( - list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste(" ZShift =", round(Z_Shift_value, 2))), - list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste(" lm ZScore =", round(Z_lm_value, 2))), + list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste(" ZShift =", round(Z_Shift_value, 2))), + list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste(" lm ZScore =", round(Z_lm_value, 2))), # list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste(" R-squared =", round(R_squared_value, 2))), list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG_value)), list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB_value)), @@ -1582,11 +1564,10 @@ main <- function() { )$df_with_stats message("Calculating reference strain interaction scores") - results <- calculate_interaction_scores(df_reference_interaction_stats, - df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug")) - df_reference_calculations <- results$calculations - df_reference_interactions <- results$interactions - df_reference_interactions_joined <- results$full_data + reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug")) + df_reference_calculations <- reference_results$calculations + df_reference_interactions <- reference_results$interactions + df_reference_interactions_joined <- reference_results$full_data write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) @@ -1614,10 +1595,10 @@ main <- function() { )$df_with_stats message("Calculating deletion strain(s) interactions scores") - results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) - df_calculations <- results$calculations - df_interactions <- results$interactions - df_interactions_joined <- results$full_data + deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) + df_calculations <- deletion_results$calculations + df_interactions <- deletion_results$interactions + df_interactions_joined <- deletion_results$full_data write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)