From 111909914c38b11eca066189587e725b351eac95 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Mon, 2 Sep 2024 16:59:53 -0400 Subject: [PATCH] Before calculate_interaction_scores() refactor --- .../apps/r/calculate_interaction_zscores5.R | 56 +++++++++++-------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index d2fd120e..5cfefd9f 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -5,10 +5,17 @@ suppressMessages({ library(dplyr) library(ggthemes) library(data.table) + library(unix) }) options(warn = 2, max.print = 1000) + +# Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes) +soft_limit <- 30 * 1024 * 1024 * 1024 +hard_limit <- 30 * 1024 * 1024 * 1024 +rlimit_as(soft_limit, hard_limit) + # Constants for configuration plot_width <- 14 plot_height <- 9 @@ -224,23 +231,6 @@ generate_and_save_plots <- function(df, output_dir, prefix, variables, include_q save_plots(prefix, plots, output_dir) } -# Calculate summary statistics for all variables -calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { - # Calculate summary statistics with the grouping columns - summary_stats <- df %>% - group_by(across(all_of(group_vars))) %>% - summarise(across(all_of(variables), list( - mean = ~mean(.x, na.rm = TRUE), - median = ~median(.x, na.rm = TRUE), - max = ~max(.x, na.rm = TRUE), - min = ~min(.x, na.rm = TRUE), - sd = ~sd(.x, na.rm = TRUE), - se = ~sd(.x, na.rm = TRUE) / sqrt(n() - 1) - ), .names = "{.col}_{.fn}")) - - return(summary_stats) -} - # Ensure all plots are saved and printed to PDF save_plots <- function(file_name, plot_list, output_dir) { # Save to PDF @@ -294,17 +284,34 @@ process_strains <- function(df) { return(df_strains) } +# Calculate summary statistics for all variables +calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { + # Calculate summary statistics with the grouping columns + summary_stats <- df %>% + group_by(across(all_of(group_vars))) %>% + summarise(across(all_of(variables), list( + mean = ~mean(.x, na.rm = TRUE), + median = ~median(.x, na.rm = TRUE), + max = ~max(.x, na.rm = TRUE), + min = ~min(.x, na.rm = TRUE), + sd = ~sd(.x, na.rm = TRUE), + se = ~sd(.x, na.rm = TRUE) / sqrt(n() - 1) + ), .names = "{.col}_{.fn}")) + + return(summary_stats) +} -calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { +calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { # Pull the background means print("Calculating background means") - l_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(L_mean) - k_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(K_mean) - r_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(r_mean) - auc_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(AUC_mean) + l_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(L_mean) + k_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(K_mean) + r_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(r_mean) + auc_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(AUC_mean) # Calculate all necessary statistics and shifts in one step + print("Calculating interaction scores part 1") interaction_scores_all <- df %>% group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>% summarise( @@ -344,6 +351,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c ungroup() # Calculate linear models and interaction scores + print("Calculating interaction scores part 2") interaction_scores <- interaction_scores_all %>% group_by(across(all_of(group_vars))) %>% summarise( @@ -648,9 +656,9 @@ main <- function() { # Calculate interactions variables <- c("L", "K", "r", "AUC") message("Calculating reference interaction scores") - reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables) + reference_results <- calculate_interaction_scores(stats_joined, reference_strain, max_conc, variables) message("Calculating deletion interaction scores") - deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables) + deletion_results <- calculate_interaction_scores(stats_joined, deletion_strains, max_conc, variables) zscores_calculations_reference <- reference_results$zscores_calculations zscores_interactions_reference <- reference_results$zscores_interactions