From 40395d26e7f1a0e83188a7f0a7618388e64119f8 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Mon, 2 Sep 2024 15:36:45 -0400 Subject: [PATCH] Use a single df for process_strains() --- .../apps/r/calculate_interaction_zscores5.R | 41 +++++++------------ 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 0f17552b..2edf70c7 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -270,21 +270,16 @@ save_plots <- function(file_name, plot_list, output_dir) { } # Process strains (deletion and reference) -process_strains <- function(df, l_within_2sd_k, strain) { +process_strains <- function(df) { df_strains <- data.frame() # Initialize an empty dataframe to store results - print(names(l_within_2sd_k)) - for (concentration in unique(df$conc_num)) { + message("Processing concentration: ", concentration) df_temp <- df %>% filter(conc_num == concentration) - - if (concentration > 0) { - max_l_theoretical <- l_within_2sd_k %>% - filter(conc_num_factor == concentration) %>% - pull(L_max) - + max_l_theoretical <- df_temp %>% pull(L_max) + df_temp <- df_temp %>% mutate( L = ifelse(L == 0 & !is.na(L), max_l_theoretical, L), # Replace zero values with max theoretical @@ -292,7 +287,8 @@ process_strains <- function(df, l_within_2sd_k, strain) { L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L) # Cap L values ) } - df_strains <- bind_rows(df_strains, df_temp) # Append the results of this concentration + # Append the results of this concentration to df_strains + df_strains <- bind_rows(df_strains, df_temp) } return(df_strains) @@ -585,27 +581,20 @@ main <- function() { l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) + # Remove existing calculated summary statistics and add the new ones cols_to_remove <- names(l_within_2sd_k) cols_to_keep <- c("conc_num", "conc_num_factor") - within_2sd_k_clean <- within_2sd_k %>% select(-all_of(setdiff(cols_to_remove, cols_to_keep))) - l_within_2sd_k_joined <- within_2sd_k_clean %>% left_join(l_within_2sd_k, by = c("conc_num", "conc_num_factor")) - - - - #l_within_2sd_k_joined <- merge(within_2sd_k, l_within_2sd_k, by = c("conc_num", "conc_num_factor"), all.x = TRUE) - print("within_2sd_k") - print(head(within_2sd_k)) - print("l_within_2sd_k") - print(head(l_within_2sd_k)) - print("l_within_2sd_k_joined") - print(head(l_within_2sd_k_joined)) - - quit() + # print("within_2sd_k") + # print(head(within_2sd_k)) + # print("l_within_2sd_k") + # print(head(l_within_2sd_k)) + # print("l_within_2sd_k_joined") + # print(head(l_within_2sd_k_joined)) write.csv(l_within_2sd_k, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), @@ -655,9 +644,9 @@ main <- function() { mutate(SM = 0) message("Processing reference strain") - reference_strain <- process_strains(df_reference, l_within_2sd_k_joined, strain) + reference_strain <- process_strains(l_within_2sd_k_joined) message("Processing deletion strains") - deletion_strains <- process_strains(df_deletion, l_within_2sd_k_joined, strain) + deletion_strains <- process_strains(l_within_2sd_k_joined) # TODO we may need to add "num" to grouping vars