Move more lm calcs out of dplyr

This commit is contained in:
2024-09-12 21:40:22 -04:00
parent 30abced9ff
commit 54b32f47c7

View File

@@ -277,23 +277,23 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
# Create linear models with error handling for missing/insufficient data
lms <- stats %>%
summarise(
lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NULL,
lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NULL,
lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NULL,
lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NULL
lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NA,
lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NA,
lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NA,
lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NA
)
stats <- stats %>%
left_join(lms, by = group_vars) %>%
mutate(
lm_Score_L = sapply(lm_L, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_K = sapply(lm_K, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_r = sapply(lm_r, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
R_Squared_L = sapply(lm_L, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
R_Squared_K = sapply(lm_K, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
R_Squared_r = sapply(lm_r, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
lm_Score_L = lapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_K = lapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_r = lapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
lm_Score_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
R_Squared_L = lapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
R_Squared_K = lapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
R_Squared_r = lapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
R_Squared_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
@@ -302,6 +302,10 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
stats <- stats %>%
mutate(
lm_Score_L = unlist(lm_Score_L),
lm_Score_K = unlist(lm_Score_K),
lm_Score_r = unlist(lm_Score_r),
lm_Score_AUC = unlist(lm_Score_AUC),
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
@@ -693,6 +697,7 @@ main <- function() {
summary_vars <- c("L", "K", "r", "AUC", "delta_bg") # fields to filter and calculate summary stats across
group_vars <- c("OrfRep", "conc_num", "conc_num_factor") # default fields to group by
orf_group_vars <- c("OrfRep", "Gene", "num")
print_vars <- c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
@@ -1024,10 +1029,10 @@ main <- function() {
message("Calculating interaction scores")
# print("Reference strain:")
# print(head(reference_strain))
reference_results <- calculate_interaction_scores(reference_strain, max_conc, interaction_vars)
reference_results <- calculate_interaction_scores(reference_strain, max_conc, interaction_vars, group_vars = orf_group_vars)
# print("Deletion strains:")
# print(head(deletion_strains))
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, interaction_vars)
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, interaction_vars, group_vars = orf_group_vars)
zscores_calculations_reference <- reference_results$calculations
zscores_interactions_reference <- reference_results$interactions
@@ -1114,11 +1119,20 @@ main <- function() {
# Formerly X_NArm
zscores_interactions_filtered <- zscores_interactions %>%
group_by(across(all_of(group_vars))) %>%
group_by(across(all_of(orf_group_vars))) %>%
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
# Final filtered correlation calculations and plots
lm_results <- zscores_interactions_filtered %>%
summarise(
lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA,
lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA
)
zscores_interactions_filtered <- zscores_interactions_filtered %>%
left_join(lm_results, by = orf_group_vars) %>%
mutate(
Overlap = case_when(
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
@@ -1128,14 +1142,11 @@ main <- function() {
Z_lm_L >= 2 & Avg_Zscore_L <= -2 ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
Z_lm_L <= -2 & Avg_Zscore_L >= 2 ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
TRUE ~ "No Effect"
),
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
)
) %>%
ungroup()
rank_plot_configs <- c(
generate_rank_plot_configs(zscores_interactions_filtered, "Rank_L", "Avg_Zscore_L", "L"),
generate_rank_plot_configs(zscores_interactions_filtered, "Rank_K", "Avg_Zscore_K", "K")