Make calculate_summary_stats() more explicit

This commit is contained in:
2024-09-04 21:39:47 -04:00
parent 93c3b82140
commit 464fef2cfc

View File

@@ -114,7 +114,7 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
# Clean and convert columns to numeric where appropriate
df <- df %>%
filter(!(.data[[1]] %in% c("", "Scan"))) %>%
filter(!(.[[1]] %in% c("", "Scan"))) %>%
filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>%
mutate(
Col = as.numeric(Col),
@@ -189,23 +189,31 @@ process_strains <- function(df) {
# Calculate summary statistics for all variables
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
# Calculate summary statistics with the grouping columns
summary_stats <- df %>%
group_by(across(all_of(group_vars))) %>%
summarise(across(all_of(variables), list(
N = length(.x),
mean = ~mean(.x, na.rm = TRUE),
median = ~median(.x, na.rm = TRUE),
max = ~max(.x, na.rm = TRUE),
min = ~min(.x, na.rm = TRUE),
sd = ~sd(.x, na.rm = TRUE),
se = sd / sqrt(N - 1),
z_max = (max - mean) / sd
), .names = "{.fn}_{.col}"))
N = ~length(.),
mean = ~mean(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE)
), .names = "{.fn}_{.col}")) %>%
mutate(
se_L = sd_L / sqrt(N_L - 1),
se_K = sd_K / sqrt(N_K - 1),
se_r = sd_r / sqrt(N_r - 1),
se_AUC = sd_AUC / sqrt(N_AUC - 1),
z_max_L = (max_L - mean_L) / sd_L,
z_max_K = (max_K - mean_K) / sd_K,
z_max_r = (max_r - mean_r) / sd_r,
z_max_AUC = (max_AUC - mean_AUC) / sd_AUC
)
return(summary_stats)
}
calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
# Calculate total concentration variables