Use a single N value

This commit is contained in:
2024-09-05 20:33:30 -04:00
parent fa54503a5a
commit de599ed5b8

View File

@@ -183,20 +183,23 @@ process_strains <- function(df) {
# Calculate summary statistics for all variables # Calculate summary statistics for all variables
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
# Calculate summary statistics, including a single N based on L
summary_stats <- df %>% summary_stats <- df %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
reframe(across(all_of(variables), list( reframe(
mean = ~mean(., na.rm = TRUE), N = sum(!is.na(L)), # Single N based on L
median = ~median(., na.rm = TRUE), across(all_of(variables), list(
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), mean = ~mean(., na.rm = TRUE),
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), median = ~median(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE), max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA) min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
# TODO: not in original stats but better to do here than in calculate_interactions? sd = ~sd(., na.rm = TRUE),
# z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA, se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
# (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score # TODO: not in original stats but better to do here than in calculate_interactions?
), .names = "{.fn}_{.col}")) %>% # z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA,
mutate(N = sum(!is.na(L))) # count of non-NA L values # (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score
), .names = "{.fn}_{.col}")
)
# Join the summary stats back to the original dataframe # Join the summary stats back to the original dataframe
df_with_stats <- left_join(df, summary_stats, by = group_vars) df_with_stats <- left_join(df, summary_stats, by = group_vars)
@@ -205,6 +208,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats)) return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
} }
# Calculate interaction scores # Calculate interaction scores
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {