Switch to map2() to parse lm coefficients

This commit is contained in:
2024-10-07 06:07:45 -04:00
parent 52381426b2
commit d978407b06

View File

@@ -217,22 +217,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
group_vars <- c("OrfRep", "Gene", "Drug")
}
# This is a helper function to perform a linear regression and extract coefficients
perform_lm <- function(data, response, predictor, max_conc) {
valid_data <- data %>% filter(!is.na(data[[response]]))
if (nrow(valid_data) > 1) {
model <- lm(data[[response]] ~ data[[predictor]], data = valid_data)
list(
intercept = coef(model)[1],
slope = coef(model)[2],
r_squared = summary(model)$r.squared,
score = max_conc * coef(model)[2] + coef(model)[1]
)
} else {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
}
}
# Calculate WT statistics from df_bg
wt_stats <- df_bg %>%
group_by(across(all_of(bg_group_vars))) %>%
@@ -312,37 +296,82 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC
) %>%
ungroup() %>% # Ungroup before group_modify
ungroup()
calculations <- calculations %>%
group_by(across(all_of(group_vars))) %>%
group_modify(~ {
lm_L <- perform_lm(.x, "Delta_L", "conc_num_factor", max_conc)
lm_K <- perform_lm(.x, "Delta_K", "conc_num_factor", max_conc)
lm_r <- perform_lm(.x, "Delta_r", "conc_num_factor", max_conc)
lm_AUC <- perform_lm(.x, "Delta_AUC", "conc_num_factor", max_conc)
mutate(
lm_L = map2(.x = Delta_L, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_K = map2(.x = Delta_K, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_r = map2(.x = Delta_r, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_AUC = map2(.x = Delta_AUC, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
.x %>%
mutate(
lm_intercept_L = lm_L$intercept,
lm_slope_L = lm_L$slope,
R_Squared_L = lm_L$r_squared,
lm_Score_L = lm_L$score,
# Extract coefficients and statistics for each model
lm_intercept_L = map_dbl(lm_L, "intercept"),
lm_slope_L = map_dbl(lm_L, "slope"),
R_Squared_L = map_dbl(lm_L, "r_squared"),
lm_Score_L = map_dbl(lm_L, "score"),
lm_intercept_K = lm_K$intercept,
lm_slope_K = lm_K$slope,
R_Squared_K = lm_K$r_squared,
lm_Score_K = lm_K$score,
lm_intercept_K = map_dbl(lm_K, "intercept"),
lm_slope_K = map_dbl(lm_K, "slope"),
R_Squared_K = map_dbl(lm_K, "r_squared"),
lm_Score_K = map_dbl(lm_K, "score"),
lm_intercept_r = lm_r$intercept,
lm_slope_r = lm_r$slope,
R_Squared_r = lm_r$r_squared,
lm_Score_r = lm_r$score,
lm_intercept_r = map_dbl(lm_r, "intercept"),
lm_slope_r = map_dbl(lm_r, "slope"),
R_Squared_r = map_dbl(lm_r, "r_squared"),
lm_Score_r = map_dbl(lm_r, "score"),
lm_intercept_AUC = lm_AUC$intercept,
lm_slope_AUC = lm_AUC$slope,
R_Squared_AUC = lm_AUC$r_squared,
lm_Score_AUC = lm_AUC$score
)
}) %>%
lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
lm_slope_AUC = map_dbl(lm_AUC, "slope"),
R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
lm_Score_AUC = map_dbl(lm_AUC, "score")
) %>%
select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>%
ungroup()
# For interaction plot error bars
@@ -480,77 +509,183 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
TRUE ~ "No Effect"
),
# For rank plots
lm_L = perform_lm(., "Z_lm_L", "Avg_Zscore_L", max_conc),
lm_K = perform_lm(., "Z_lm_K", "Avg_Zscore_K", max_conc),
lm_r = perform_lm(., "Z_lm_r", "Avg_Zscore_r", max_conc),
lm_AUC = perform_lm(., "Z_lm_AUC", "Avg_Zscore_AUC", max_conc),
lm_L = map2(.x = Z_lm_L, .y = Avg_Zscore_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_K = map2(.x = Z_lm_K, .y = Avg_Zscore_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_r = map2(.x = Z_lm_r, .y = Avg_Zscore_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_AUC = map2(.x = Z_lm_AUC, .y = Avg_Zscore_AUC, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
# For correlation plots
Z_lm_K_L = perform_lm(., "Z_lm_K", "Z_lm_L", max_conc),
Z_lm_r_L = perform_lm(., "Z_lm_r", "Z_lm_L", max_conc),
Z_lm_R_AUC_L = perform_lm(., "Z_lm_AUC", "Z_lm_L", max_conc),
Z_lm_R_r_K = perform_lm(., "Z_lm_r", "Z_lm_K", max_conc),
Z_lm_R_AUC_K = perform_lm(., "Z_lm_AUC", "Z_lm_K", max_conc),
Z_lm_R_AUC_r = perform_lm(., "Z_lm_AUC", "Z_lm_r", max_conc),
Z_lm_K_L = map2(.x = Z_lm_K, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_r_L = map2(.x = Z_lm_r, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_AUC_L = map2(.x = Z_lm_AUC, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_r_K = map2(.x = Z_lm_r, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_AUC_K = map2(.x = Z_lm_AUC, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_AUC_r = map2(.x = Z_lm_AUC, .y = Z_lm_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
# Extract coefficients and statistics for each model
lm_rank_intercept_L = lm_L$intercept,
lm_rank_slope_L = lm_L$slope,
R_Squared_L = lm_L$r_squared,
lm_Score_L = lm_L$score,
lm_rank_intercept_L = map_dbl(lm_L, "intercept"),
lm_rank_slope_L = map_dbl(lm_L, "slope"),
R_Squared_L = map_dbl(lm_L, "r_squared"),
lm_Score_L = map_dbl(lm_L, "score"),
lm_intercept_K = lm_K$intercept,
lm_slope_K = lm_K$slope,
R_Squared_K = lm_K$r_squared,
lm_Score_K = lm_K$score,
lm_intercept_K = map_dbl(lm_K, "intercept"),
lm_slope_K = map_dbl(lm_K, "slope"),
R_Squared_K = map_dbl(lm_K, "r_squared"),
lm_Score_K = map_dbl(lm_K, "score"),
lm_intercept_r = lm_r$intercept,
lm_slope_r = lm_r$slope,
R_Squared_r = lm_r$r_squared,
lm_Score_r = lm_r$score,
lm_intercept_r = map_dbl(lm_r, "intercept"),
lm_slope_r = map_dbl(lm_r, "slope"),
R_Squared_r = map_dbl(lm_r, "r_squared"),
lm_Score_r = map_dbl(lm_r, "score"),
lm_intercept_AUC = lm_AUC$intercept,
lm_slope_AUC = lm_AUC$slope,
R_Squared_AUC = lm_AUC$r_squared,
lm_Score_AUC = lm_AUC$score,
lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
lm_slope_AUC = map_dbl(lm_AUC, "slope"),
R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
lm_Score_AUC = map_dbl(lm_AUC, "score"),
Z_lm_intercept_K_L = Z_lm_K_L$intercept,
Z_lm_slope_K_L = Z_lm_K_L$slope,
Z_lm_R_squared_K_L = Z_lm_K_L$r_squared,
Z_lm_Score_K_L = Z_lm_K_L$score,
Z_lm_intercept_K_L = map_dbl(Z_lm_K_L, "intercept"),
Z_lm_slope_K_L = map_dbl(Z_lm_K_L, "slope"),
Z_lm_R_squared_K_L = map_dbl(Z_lm_K_L, "r_squared"),
Z_lm_Score_K_L = map_dbl(Z_lm_K_L, "score"),
Z_lm_intercept_r_L = Z_lm_r_L$intercept,
Z_lm_slope_r_L = Z_lm_r_L$slope,
Z_lm_R_squared_r_L = Z_lm_r_L$r_squared,
Z_lm_Score_r_L = Z_lm_r_L$score,
Z_lm_intercept_r_L = map_dbl(Z_lm_r_L, "intercept"),
Z_lm_slope_r_L = map_dbl(Z_lm_r_L, "slope"),
Z_lm_R_squared_r_L = map_dbl(Z_lm_r_L, "r_squared"),
Z_lm_Score_r_L = map_dbl(Z_lm_r_L, "score"),
Z_lm_intercept_R_AUC_L = Z_lm_R_AUC_L$intercept,
Z_lm_slope_R_AUC_L = Z_lm_R_AUC_L$slope,
Z_lm_R_squared_R_AUC_L = Z_lm_R_AUC_L$r_squared,
Z_lm_Score_R_AUC_L = Z_lm_R_AUC_L$score,
Z_lm_intercept_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "intercept"),
Z_lm_slope_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "slope"),
Z_lm_R_squared_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "r_squared"),
Z_lm_Score_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "score"),
Z_lm_intercept_R_r_K = Z_lm_R_r_K$intercept,
Z_lm_slope_R_r_K = Z_lm_R_r_K$slope,
Z_lm_R_squared_R_r_K = Z_lm_R_r_K$r_squared,
Z_lm_Score_R_r_K = Z_lm_R_r_K$score,
Z_lm_intercept_R_r_K = map_dbl(Z_lm_R_r_K, "intercept"),
Z_lm_slope_R_r_K = map_dbl(Z_lm_R_r_K, "slope"),
Z_lm_R_squared_R_r_K = map_dbl(Z_lm_R_r_K, "r_squared"),
Z_lm_Score_R_r_K = map_dbl(Z_lm_R_r_K, "score"),
Z_lm_intercept_R_AUC_K = Z_lm_R_AUC_K$intercept,
Z_lm_slope_R_AUC_K = Z_lm_R_AUC_K$slope,
Z_lm_R_squared_R_AUC_K = Z_lm_R_AUC_K$r_squared,
Z_lm_Score_R_AUC_K = Z_lm_R_AUC_K$score,
Z_lm_intercept_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "intercept"),
Z_lm_slope_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "slope"),
Z_lm_R_squared_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "r_squared"),
Z_lm_Score_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "score"),
Z_lm_intercept_R_AUC_r = Z_lm_R_AUC_r$intercept,
Z_lm_slope_R_AUC_r = Z_lm_R_AUC_r$slope,
Z_lm_R_squared_R_AUC_r = Z_lm_R_AUC_r$r_squared,
Z_lm_Score_R_AUC_r = Z_lm_R_AUC_r$score
Z_lm_intercept_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "intercept"),
Z_lm_slope_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "slope"),
Z_lm_R_squared_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "r_squared"),
Z_lm_Score_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "score")
) %>%
# Drop the lm objects and keep only the lm results
select(
-lm_L, -lm_K, -lm_r, -lm_AUC, -Z_lm_K_L, -Z_lm_r_L, -Z_lm_R_AUC_L,
-Z_lm_R_r_K, -Z_lm_R_AUC_K, -Z_lm_R_AUC_r
)
-Z_lm_R_r_K, -Z_lm_R_AUC_K, -Z_lm_R_AUC_r)
} # end deletion-specific block
# Create the final calculations and interactions dataframes with only required columns for csv output