From e915e775042d42b1aed4b85b3ba4cf0292957252 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 1 Sep 2024 21:50:35 -0400 Subject: [PATCH] Refactor out calculate_bg_means --- .../apps/r/calculate_interaction_zscores5.R | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index e0cf5831..17b17ecd 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -273,16 +273,6 @@ save_plots <- function(file_name, plot_list, output_dir) { }) } -# Calculate background strain mean values -calculate_bg_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) { - list( - L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L), - K = df_stats_by_k %>% filter(conc_num_factor == 0) %>% pull(mean_K), - r = df_stats_by_r %>% filter(conc_num_factor == 0) %>% pull(mean_r), - AUC = df_stats_by_auc %>% filter(conc_num_factor == 0) %>% pull(mean_AUC) - ) -} - # Process strains (deletion and reference) process_strains <- function(df, l_within_2sd_k, strain) { df_strains <- data.frame() # Initialize an empty dataframe to store results @@ -308,7 +298,13 @@ process_strains <- function(df, l_within_2sd_k, strain) { } calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc, - background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { + max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { + + # Calculate background means + L_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(L_mean) + K_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(K_mean) + r_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(r_mean) + AUC_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(AUC_mean) # Calculate all necessary statistics and shifts in one step interaction_scores_all <- df %>% @@ -318,18 +314,18 @@ calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, df_st across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE), NG = sum(NG, na.rm = TRUE), SM = sum(SM, na.rm = TRUE), - raw_shift_l = mean(L, na.rm = TRUE) - background_means$L, - raw_shift_k = mean(K, na.rm = TRUE) - background_means$K, - raw_shift_r = mean(r, na.rm = TRUE) - background_means$r, - raw_shift_auc = mean(AUC, na.rm = TRUE) - background_means$AUC, + raw_shift_l = mean(L, na.rm = TRUE) - L_mean_bg, + raw_shift_k = mean(K, na.rm = TRUE) - K_mean_bg, + raw_shift_r = mean(r, na.rm = TRUE) - r_mean_bg, + raw_shift_auc = mean(AUC, na.rm = TRUE) - AUC_mean_bg, z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1], z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1], z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1], z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1], - exp_l = background_means$L + raw_shift_l, - exp_k = background_means$K + raw_shift_k, - exp_r = background_means$r + raw_shift_r, - exp_auc = background_means$AUC + raw_shift_auc, + exp_l = L_mean_bg + raw_shift_l, + exp_k = K_mean_bg + raw_shift_k, + exp_r = r_mean_bg + raw_shift_r, + exp_auc = AUC_mean_bg + raw_shift_auc, delta_l = mean(L, na.rm = TRUE) - exp_l, delta_k = mean(K, na.rm = TRUE) - exp_k, delta_r = mean(r, na.rm = TRUE) - exp_r, @@ -583,15 +579,20 @@ main <- function() { stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") - stats_by_k_joined <- left_join(df_na, stats_by_k, by = c("conc_num_factor")) # TODO may need to add OrfRep, conc_num, etc - + # Originally this filtered L NA's + # I've removed that filtering for now since it didn't seem right but may need to add it back in later + # str(stats_by_k) + stats_by_k_joined <- left_join(df_na, stats_by_k, by = c("conc_num", "conc_num_factor")) + + str(stats_by_k_joined) + # Filter data within 2SD - within_2sd_k <- stats_by_k_joined %>% - filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K)) + within_2sd_k <- stats_by_k %>% + filter(K >= (K_mean - 2 * K_sd) & K <= (K_mean + 2 * K_sd)) # Filter data outside 2SD - outside_2sd_k <- stats_by_k_joined %>% - filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K)) + outside_2sd_k <- stats_by_k %>% + filter(K < (K_mean - 2 * K_sd) | K > (K_mean + 2 * K_sd)) # Calculate summary statistics for L within and outside 2SD of K message("Calculating summary statistics for L within 2SD of K") @@ -606,8 +607,8 @@ main <- function() { row.names = FALSE) # Process background strains - background_strains <- c("YDL227C") - lapply(background_strains, function(strain) { + bg_strains <- c("YDL227C") + lapply(bg_strains, function(strain) { message("Processing background strain: ", strain) @@ -635,9 +636,6 @@ main <- function() { row.names = FALSE) stats_bg_joined <- left_join(df_bg, stats_bg, by = c("OrfRep", "Gene", "conc_num", "conc_num_factor")) - message("Calculating background means") - background_means <- calculate_bg_means(stats_by_l_bg, stats_by_k_bg, stats_by_r_bg, stats_by_auc_bg) - # Filter reference and deletion strains # Formerly X2_RF (reference strain) df_reference <- df_na %>% @@ -662,15 +660,17 @@ main <- function() { # This is synonymous with the legacy OrfRep mutation # Use group_by in functions in lieu of mutating OrfRep # default_group_vars <- c("OrfRep", "Gene", "num") + + # TODO we may need to add "num" to grouping vars # Calculate interactions variables <- c("L", "K", "r", "AUC") message("Calculating reference interaction scores") reference_results <- calculate_interaction_scores(reference_strain, stats_by_l, - stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) + stats_by_k, stats_by_r, stats_by_auc, max_conc, variables) message("Calculating deletion interaction scores") deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l, - stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables) + stats_by_k, stats_by_r, stats_by_auc, max_conc, variables) zscores_calculations_reference <- reference_results$zscores_calculations zscores_interactions_reference <- reference_results$zscores_interactions