Fix pull var

This commit is contained in:
2024-09-04 22:38:59 -04:00
parent 58fd7453ce
commit 10660e40d0

View File

@@ -171,7 +171,7 @@ process_strains <- function(df) {
df_temp <- df %>% filter(conc_num == concentration) df_temp <- df %>% filter(conc_num == concentration)
if (concentration > 0) { if (concentration > 0) {
max_l_theoretical <- df_temp %>% pull(L_max) max_l_theoretical <- df_temp %>% pull(max_L)
df_temp <- df_temp %>% df_temp <- df_temp %>%
mutate( mutate(
@@ -191,29 +191,22 @@ process_strains <- function(df) {
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
summary_stats <- df %>% summary_stats <- df %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
summarise(across(all_of(variables), list( reframe(across(all_of(variables), list(
N = ~length(na.omit(.)), # Exclude NA values from count N = ~sum(!is.na(.)), # Count of non-NA values
mean = ~mean(., na.rm = TRUE), # Mean ignoring NAs mean = ~mean(., na.rm = TRUE), # Mean ignoring NAs
median = ~median(., na.rm = TRUE), # Median ignoring NAs median = ~median(., na.rm = TRUE), # Median ignoring NAs
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Handle groups where all values are NA max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Return NA if all values are NA
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Handle groups where all values are NA min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Return NA if all values are NA
sd = ~sd(., na.rm = TRUE) # Standard deviation ignoring NAs sd = ~sd(., na.rm = TRUE), # Standard deviation ignoring NAs
), .names = "{.fn}_{.col}")) %>% se = ~ifelse(.N > 1, sd(., na.rm = TRUE) / sqrt(.N - 1), NA), # Standard Error using precomputed N
mutate( z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA, (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score
se_L = ifelse(N_L > 1, sd_L / sqrt(N_L - 1), NA), # Standard error with check for division by zero ), .names = "{.fn}_{.col}"))
se_K = ifelse(N_K > 1, sd_K / sqrt(N_K - 1), NA),
se_r = ifelse(N_r > 1, sd_r / sqrt(N_r - 1), NA),
se_AUC = ifelse(N_AUC > 1, sd_AUC / sqrt(N_AUC - 1), NA),
z_max_L = ifelse(sd_L == 0, NA, (max_L - mean_L) / sd_L), # Avoid division by zero for Z-scores
z_max_K = ifelse(sd_K == 0, NA, (max_K - mean_K) / sd_K),
z_max_r = ifelse(sd_r == 0, NA, (max_r - mean_r) / sd_r),
z_max_AUC = ifelse(sd_AUC == 0, NA, (max_AUC - mean_AUC) / sd_AUC)
)
return(summary_stats) return(summary_stats)
} }
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
# Calculate total concentration variables # Calculate total concentration variables
@@ -242,12 +235,12 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
SM = sum(SM) SM = sum(SM)
) %>% ) %>%
summarise(across(all_of(variables), list( summarise(across(all_of(variables), list(
mean = ~mean(.x, na.rm = TRUE), mean = ~mean(., na.rm = TRUE),
median = ~median(.x, na.rm = TRUE), median = ~median(., na.rm = TRUE),
max = ~max(.x, na.rm = TRUE), max = ~max(., na.rm = TRUE),
min = ~min(.x, na.rm = TRUE), min = ~min(., na.rm = TRUE),
sd = ~sd(.x, na.rm = TRUE), sd = ~sd(., na.rm = TRUE),
se = ~sd(.x, na.rm = TRUE) / sqrt(N - 1) # TODO why - 1? se = ~sd(., na.rm = TRUE) / sqrt(N - 1)
), .names = "{.fn}_{.col}")) %>% ), .names = "{.fn}_{.col}")) %>%
summarise( summarise(
Raw_Shift_L = mean_L[[1]] - bg_L, Raw_Shift_L = mean_L[[1]] - bg_L,