From de599ed5b8b9ec1d8450ab5cf85a32b2cd303dbb Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Thu, 5 Sep 2024 20:33:30 -0400 Subject: [PATCH] Use a single N value --- .../apps/r/calculate_interaction_zscores5.R | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 718815af..26d239cc 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -183,20 +183,23 @@ process_strains <- function(df) { # Calculate summary statistics for all variables calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { + # Calculate summary statistics, including a single N based on L summary_stats <- df %>% group_by(across(all_of(group_vars))) %>% - reframe(across(all_of(variables), list( - mean = ~mean(., na.rm = TRUE), - median = ~median(., na.rm = TRUE), - max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), - min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), - sd = ~sd(., na.rm = TRUE), - se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA) - # TODO: not in original stats but better to do here than in calculate_interactions? - # z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA, - # (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score - ), .names = "{.fn}_{.col}")) %>% - mutate(N = sum(!is.na(L))) # count of non-NA L values + reframe( + N = sum(!is.na(L)), # Single N based on L + across(all_of(variables), list( + mean = ~mean(., na.rm = TRUE), + median = ~median(., na.rm = TRUE), + max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), + min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), + sd = ~sd(., na.rm = TRUE), + se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA) + # TODO: not in original stats but better to do here than in calculate_interactions? + # z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA, + # (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score + ), .names = "{.fn}_{.col}") + ) # Join the summary stats back to the original dataframe df_with_stats <- left_join(df, summary_stats, by = group_vars) @@ -205,6 +208,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c return(list(summary_stats = summary_stats, df_with_stats = df_with_stats)) } + # Calculate interaction scores calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {