diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index c688aab8..25bea49e 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -114,7 +114,7 @@ load_and_process_data <- function(easy_results_file, sd = 3) { # Clean and convert columns to numeric where appropriate df <- df %>% - filter(!(.data[[1]] %in% c("", "Scan"))) %>% + filter(!(.[[1]] %in% c("", "Scan"))) %>% filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>% mutate( Col = as.numeric(Col), @@ -189,23 +189,31 @@ process_strains <- function(df) { # Calculate summary statistics for all variables calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { - # Calculate summary statistics with the grouping columns summary_stats <- df %>% group_by(across(all_of(group_vars))) %>% summarise(across(all_of(variables), list( - N = length(.x), - mean = ~mean(.x, na.rm = TRUE), - median = ~median(.x, na.rm = TRUE), - max = ~max(.x, na.rm = TRUE), - min = ~min(.x, na.rm = TRUE), - sd = ~sd(.x, na.rm = TRUE), - se = sd / sqrt(N - 1), - z_max = (max - mean) / sd - ), .names = "{.fn}_{.col}")) + N = ~length(.), + mean = ~mean(., na.rm = TRUE), + median = ~median(., na.rm = TRUE), + max = ~max(., na.rm = TRUE), + min = ~min(., na.rm = TRUE), + sd = ~sd(., na.rm = TRUE) + ), .names = "{.fn}_{.col}")) %>% + mutate( + se_L = sd_L / sqrt(N_L - 1), + se_K = sd_K / sqrt(N_K - 1), + se_r = sd_r / sqrt(N_r - 1), + se_AUC = sd_AUC / sqrt(N_AUC - 1), + z_max_L = (max_L - mean_L) / sd_L, + z_max_K = (max_K - mean_K) / sd_K, + z_max_r = (max_r - mean_r) / sd_r, + z_max_AUC = (max_AUC - mean_AUC) / sd_AUC + ) return(summary_stats) } + calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { # Calculate total concentration variables