From 10660e40d03f9e1f3c57b1747977ad6243f41381 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Wed, 4 Sep 2024 22:38:59 -0400 Subject: [PATCH] Fix pull var --- .../apps/r/calculate_interaction_zscores5.R | 41 ++++++++----------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 6328f7a8..e4efb574 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -171,7 +171,7 @@ process_strains <- function(df) { df_temp <- df %>% filter(conc_num == concentration) if (concentration > 0) { - max_l_theoretical <- df_temp %>% pull(L_max) + max_l_theoretical <- df_temp %>% pull(max_L) df_temp <- df_temp %>% mutate( @@ -191,29 +191,22 @@ process_strains <- function(df) { calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { summary_stats <- df %>% group_by(across(all_of(group_vars))) %>% - summarise(across(all_of(variables), list( - N = ~length(na.omit(.)), # Exclude NA values from count + reframe(across(all_of(variables), list( + N = ~sum(!is.na(.)), # Count of non-NA values mean = ~mean(., na.rm = TRUE), # Mean ignoring NAs median = ~median(., na.rm = TRUE), # Median ignoring NAs - max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Handle groups where all values are NA - min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Handle groups where all values are NA - sd = ~sd(., na.rm = TRUE) # Standard deviation ignoring NAs - ), .names = "{.fn}_{.col}")) %>% - mutate( - se_L = ifelse(N_L > 1, sd_L / sqrt(N_L - 1), NA), # Standard error with check for division by zero - se_K = ifelse(N_K > 1, sd_K / sqrt(N_K - 1), NA), - se_r = ifelse(N_r > 1, sd_r / sqrt(N_r - 1), NA), - se_AUC = ifelse(N_AUC > 1, sd_AUC / sqrt(N_AUC - 1), NA), - z_max_L = ifelse(sd_L == 0, NA, (max_L - mean_L) / sd_L), # Avoid division by zero for Z-scores - z_max_K = ifelse(sd_K == 0, NA, (max_K - mean_K) / sd_K), - z_max_r = ifelse(sd_r == 0, NA, (max_r - mean_r) / sd_r), - z_max_AUC = ifelse(sd_AUC == 0, NA, (max_AUC - mean_AUC) / sd_AUC) - ) - + max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Return NA if all values are NA + min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Return NA if all values are NA + sd = ~sd(., na.rm = TRUE), # Standard deviation ignoring NAs + se = ~ifelse(.N > 1, sd(., na.rm = TRUE) / sqrt(.N - 1), NA), # Standard Error using precomputed N + z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA, (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score + ), .names = "{.fn}_{.col}")) + return(summary_stats) } + calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { # Calculate total concentration variables @@ -242,12 +235,12 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c SM = sum(SM) ) %>% summarise(across(all_of(variables), list( - mean = ~mean(.x, na.rm = TRUE), - median = ~median(.x, na.rm = TRUE), - max = ~max(.x, na.rm = TRUE), - min = ~min(.x, na.rm = TRUE), - sd = ~sd(.x, na.rm = TRUE), - se = ~sd(.x, na.rm = TRUE) / sqrt(N - 1) # TODO why - 1? + mean = ~mean(., na.rm = TRUE), + median = ~median(., na.rm = TRUE), + max = ~max(., na.rm = TRUE), + min = ~min(., na.rm = TRUE), + sd = ~sd(., na.rm = TRUE), + se = ~sd(., na.rm = TRUE) / sqrt(N - 1) ), .names = "{.fn}_{.col}")) %>% summarise( Raw_Shift_L = mean_L[[1]] - bg_L,