Interactions refactor to improve lm object handling

This commit is contained in:
2024-09-12 19:51:15 -04:00
parent 270cc8def5
commit fa7544cb0e

View File

@@ -204,64 +204,59 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first() AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
) )
# Main statistics and shifts
stats <- df %>% stats <- df %>%
mutate( mutate(
WT_L = df$mean_L, WT_L = mean_L,
WT_K = df$mean_K, WT_K = mean_K,
WT_r = df$mean_r, WT_r = mean_r,
WT_AUC = df$mean_AUC, WT_AUC = mean_AUC,
WT_sd_L = df$sd_L, WT_sd_L = sd_L,
WT_sd_K = df$sd_K, WT_sd_K = sd_K,
WT_sd_r = df$sd_r, WT_sd_r = sd_r,
WT_sd_AUC = df$sd_AUC WT_sd_AUC = sd_AUC
) %>% ) %>%
group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>% group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
mutate( mutate(
N = sum(!is.na(L)), N = sum(!is.na(L)),
NG = sum(NG, na.rm = TRUE), NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE), DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE), SM = sum(SM, na.rm = TRUE),
across(all_of(variables), list( across(all_of(variables), list(
mean = ~mean(., na.rm = TRUE), mean = ~mean(., na.rm = TRUE),
median = ~median(., na.rm = TRUE), median = ~median(., na.rm = TRUE),
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
sd = ~sd(., na.rm = TRUE), sd = ~sd(., na.rm = TRUE),
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA) se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
), .names = "{.fn}_{.col}") ), .names = "{.fn}_{.col}")
) %>% ) %>%
ungroup() ungroup()
stats <- stats %>% stats <- stats %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
mutate( mutate(
Raw_Shift_L = mean_L[[1]] - bg_means$L, Raw_Shift_L = mean_L - bg_means$L,
Raw_Shift_K = mean_K[[1]] - bg_means$K, Raw_Shift_K = mean_K - bg_means$K,
Raw_Shift_r = mean_r[[1]] - bg_means$r, Raw_Shift_r = mean_r - bg_means$r,
Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC, Raw_Shift_AUC = mean_AUC - bg_means$AUC,
Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L, Z_Shift_L = Raw_Shift_L / bg_sd$L,
Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K, Z_Shift_K = Raw_Shift_K / bg_sd$K,
Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r, Z_Shift_r = Raw_Shift_r / bg_sd$r,
Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC Z_Shift_AUC = Raw_Shift_AUC / bg_sd$AUC
) )
stats <- stats %>% stats <- stats %>%
mutate( mutate(
Exp_L = WT_L + Raw_Shift_L, Exp_L = WT_L + Raw_Shift_L,
Exp_K = WT_K + Raw_Shift_K, Exp_K = WT_K + Raw_Shift_K,
Exp_r = WT_r + Raw_Shift_r, Exp_r = WT_r + Raw_Shift_r,
Exp_AUC = WT_AUC + Raw_Shift_AUC Exp_AUC = WT_AUC + Raw_Shift_AUC,
)
stats <- stats %>%
mutate(
Delta_L = mean_L - Exp_L, Delta_L = mean_L - Exp_L,
Delta_K = mean_K - Exp_K, Delta_K = mean_K - Exp_K,
Delta_r = mean_r - Exp_r, Delta_r = mean_r - Exp_r,
Delta_AUC = mean_AUC - Exp_AUC Delta_AUC = mean_AUC - Exp_AUC
) ) %>%
stats <- stats %>%
mutate( mutate(
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
@@ -270,39 +265,34 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L) Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
) )
stats <- stats %>% # Create linear models with proper error handling for insufficient data
mutate(
Zscore_L = Delta_L / WT_sd_L,
Zscore_K = Delta_K / WT_sd_K,
Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC
)
lms <- stats %>% lms <- stats %>%
summarise( summarise(
lm_L = list(lm(Delta_L ~ conc_num_factor)), lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NULL,
lm_K = list(lm(Delta_K ~ conc_num_factor)), lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NULL,
lm_r = list(lm(Delta_r ~ conc_num_factor)), lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NULL,
lm_AUC = list(lm(Delta_AUC ~ conc_num_factor)) lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NULL
) )
# Join models and calculate interaction scores
stats <- stats %>% stats <- stats %>%
left_join(lms, by = group_vars) %>% left_join(lms, by = group_vars) %>%
mutate( mutate(
lm_Score_L = sapply(lm_L, function(model) coef(model)[2] * max_conc + coef(model)[1]), lm_Score_L = sapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_K = sapply(lm_K, function(model) coef(model)[2] * max_conc + coef(model)[1]), lm_Score_K = sapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_r = sapply(lm_r, function(model) coef(model)[2] * max_conc + coef(model)[1]), lm_Score_r = sapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_AUC = sapply(lm_AUC, function(model) coef(model)[2] * max_conc + coef(model)[1]), lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
R_Squared_L = sapply(lm_L, function(model) summary(model)$r.squared), R_Squared_L = sapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
R_Squared_K = sapply(lm_K, function(model) summary(model)$r.squared), R_Squared_K = sapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
R_Squared_r = sapply(lm_r, function(model) summary(model)$r.squared), R_Squared_r = sapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
R_Squared_AUC = sapply(lm_AUC, function(model) summary(model)$r.squared), R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
) )
# Calculate Z-scores
stats <- stats %>% stats <- stats %>%
mutate( mutate(
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
@@ -315,9 +305,10 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
) )
# Declare column order for output # Final output preparation
calculations <- stats %>% calculations <- stats %>%
select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", select(
"OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
"mean_L", "mean_K", "mean_r", "mean_AUC", "mean_L", "mean_K", "mean_r", "mean_AUC",
"median_L", "median_K", "median_r", "median_AUC", "median_L", "median_K", "median_r", "median_AUC",
"sd_L", "sd_K", "sd_r", "sd_AUC", "sd_L", "sd_K", "sd_r", "sd_AUC",
@@ -332,10 +323,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
"NG", "SM", "DB") %>% "NG", "SM", "DB") %>%
ungroup() ungroup()
# Also arrange results by Z_lm_L and NG
interactions <- stats %>% interactions <- stats %>%
select("OrfRep", "Gene", "num", select(
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r", "OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r", "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
@@ -350,6 +340,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
return(list(calculations = calculations, interactions = interactions)) return(list(calculations = calculations, interactions = interactions))
} }
generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
message("Generating html and pdf plots for: ", file_name, ".pdf|html") message("Generating html and pdf plots for: ", file_name, ".pdf|html")