Switch to map2() to parse lm coefficients

This commit is contained in:
2024-10-07 06:07:45 -04:00
parent 52381426b2
commit d978407b06

View File

@@ -217,22 +217,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
group_vars <- c("OrfRep", "Gene", "Drug") group_vars <- c("OrfRep", "Gene", "Drug")
} }
# This is a helper function to perform a linear regression and extract coefficients
perform_lm <- function(data, response, predictor, max_conc) {
valid_data <- data %>% filter(!is.na(data[[response]]))
if (nrow(valid_data) > 1) {
model <- lm(data[[response]] ~ data[[predictor]], data = valid_data)
list(
intercept = coef(model)[1],
slope = coef(model)[2],
r_squared = summary(model)$r.squared,
score = max_conc * coef(model)[2] + coef(model)[1]
)
} else {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
}
}
# Calculate WT statistics from df_bg # Calculate WT statistics from df_bg
wt_stats <- df_bg %>% wt_stats <- df_bg %>%
group_by(across(all_of(bg_group_vars))) %>% group_by(across(all_of(bg_group_vars))) %>%
@@ -312,37 +296,82 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Zscore_r = Delta_r / WT_sd_r, Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC Zscore_AUC = Delta_AUC / WT_sd_AUC
) %>% ) %>%
ungroup() %>% # Ungroup before group_modify ungroup()
calculations <- calculations %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
group_modify(~ { mutate(
lm_L <- perform_lm(.x, "Delta_L", "conc_num_factor", max_conc) lm_L = map2(.x = Delta_L, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
lm_K <- perform_lm(.x, "Delta_K", "conc_num_factor", max_conc) list(intercept = NA, slope = NA, r_squared = NA, score = NA)
lm_r <- perform_lm(.x, "Delta_r", "conc_num_factor", max_conc) } else {
lm_AUC <- perform_lm(.x, "Delta_AUC", "conc_num_factor", max_conc) lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_K = map2(.x = Delta_K, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_r = map2(.x = Delta_r, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_AUC = map2(.x = Delta_AUC, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.x ~ .y) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
.x %>% # Extract coefficients and statistics for each model
mutate( lm_intercept_L = map_dbl(lm_L, "intercept"),
lm_intercept_L = lm_L$intercept, lm_slope_L = map_dbl(lm_L, "slope"),
lm_slope_L = lm_L$slope, R_Squared_L = map_dbl(lm_L, "r_squared"),
R_Squared_L = lm_L$r_squared, lm_Score_L = map_dbl(lm_L, "score"),
lm_Score_L = lm_L$score,
lm_intercept_K = lm_K$intercept, lm_intercept_K = map_dbl(lm_K, "intercept"),
lm_slope_K = lm_K$slope, lm_slope_K = map_dbl(lm_K, "slope"),
R_Squared_K = lm_K$r_squared, R_Squared_K = map_dbl(lm_K, "r_squared"),
lm_Score_K = lm_K$score, lm_Score_K = map_dbl(lm_K, "score"),
lm_intercept_r = lm_r$intercept, lm_intercept_r = map_dbl(lm_r, "intercept"),
lm_slope_r = lm_r$slope, lm_slope_r = map_dbl(lm_r, "slope"),
R_Squared_r = lm_r$r_squared, R_Squared_r = map_dbl(lm_r, "r_squared"),
lm_Score_r = lm_r$score, lm_Score_r = map_dbl(lm_r, "score"),
lm_intercept_AUC = lm_AUC$intercept, lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
lm_slope_AUC = lm_AUC$slope, lm_slope_AUC = map_dbl(lm_AUC, "slope"),
R_Squared_AUC = lm_AUC$r_squared, R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
lm_Score_AUC = lm_AUC$score lm_Score_AUC = map_dbl(lm_AUC, "score")
) ) %>%
}) %>% select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>%
ungroup() ungroup()
# For interaction plot error bars # For interaction plot error bars
@@ -480,77 +509,183 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore", Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
TRUE ~ "No Effect" TRUE ~ "No Effect"
), ),
lm_L = map2(.x = Z_lm_L, .y = Avg_Zscore_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
# For rank plots list(intercept = NA, slope = NA, r_squared = NA, score = NA)
lm_L = perform_lm(., "Z_lm_L", "Avg_Zscore_L", max_conc), } else {
lm_K = perform_lm(., "Z_lm_K", "Avg_Zscore_K", max_conc), lm(.y ~ .x) %>% {
lm_r = perform_lm(., "Z_lm_r", "Avg_Zscore_r", max_conc), list(
lm_AUC = perform_lm(., "Z_lm_AUC", "Avg_Zscore_AUC", max_conc), intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_K = map2(.x = Z_lm_K, .y = Avg_Zscore_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_r = map2(.x = Z_lm_r, .y = Avg_Zscore_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
lm_AUC = map2(.x = Z_lm_AUC, .y = Avg_Zscore_AUC, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
# For correlation plots # For correlation plots
Z_lm_K_L = perform_lm(., "Z_lm_K", "Z_lm_L", max_conc), Z_lm_K_L = map2(.x = Z_lm_K, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
Z_lm_r_L = perform_lm(., "Z_lm_r", "Z_lm_L", max_conc), list(intercept = NA, slope = NA, r_squared = NA, score = NA)
Z_lm_R_AUC_L = perform_lm(., "Z_lm_AUC", "Z_lm_L", max_conc), } else {
Z_lm_R_r_K = perform_lm(., "Z_lm_r", "Z_lm_K", max_conc), lm(.y ~ .x) %>% {
Z_lm_R_AUC_K = perform_lm(., "Z_lm_AUC", "Z_lm_K", max_conc), list(
Z_lm_R_AUC_r = perform_lm(., "Z_lm_AUC", "Z_lm_r", max_conc), intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_r_L = map2(.x = Z_lm_r, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_AUC_L = map2(.x = Z_lm_AUC, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_r_K = map2(.x = Z_lm_r, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_AUC_K = map2(.x = Z_lm_AUC, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
Z_lm_R_AUC_r = map2(.x = Z_lm_AUC, .y = Z_lm_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
} else {
lm(.y ~ .x) %>% {
list(
intercept = coef(.)[1],
slope = coef(.)[2],
r_squared = summary(.)$r.squared,
score = max_conc * coef(.)[2] + coef(.)[1]
)
}
}),
# Extract coefficients and statistics for each model # Extract coefficients and statistics for each model
lm_rank_intercept_L = lm_L$intercept, lm_rank_intercept_L = map_dbl(lm_L, "intercept"),
lm_rank_slope_L = lm_L$slope, lm_rank_slope_L = map_dbl(lm_L, "slope"),
R_Squared_L = lm_L$r_squared, R_Squared_L = map_dbl(lm_L, "r_squared"),
lm_Score_L = lm_L$score, lm_Score_L = map_dbl(lm_L, "score"),
lm_intercept_K = lm_K$intercept, lm_intercept_K = map_dbl(lm_K, "intercept"),
lm_slope_K = lm_K$slope, lm_slope_K = map_dbl(lm_K, "slope"),
R_Squared_K = lm_K$r_squared, R_Squared_K = map_dbl(lm_K, "r_squared"),
lm_Score_K = lm_K$score, lm_Score_K = map_dbl(lm_K, "score"),
lm_intercept_r = lm_r$intercept, lm_intercept_r = map_dbl(lm_r, "intercept"),
lm_slope_r = lm_r$slope, lm_slope_r = map_dbl(lm_r, "slope"),
R_Squared_r = lm_r$r_squared, R_Squared_r = map_dbl(lm_r, "r_squared"),
lm_Score_r = lm_r$score, lm_Score_r = map_dbl(lm_r, "score"),
lm_intercept_AUC = lm_AUC$intercept, lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
lm_slope_AUC = lm_AUC$slope, lm_slope_AUC = map_dbl(lm_AUC, "slope"),
R_Squared_AUC = lm_AUC$r_squared, R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
lm_Score_AUC = lm_AUC$score, lm_Score_AUC = map_dbl(lm_AUC, "score"),
Z_lm_intercept_K_L = Z_lm_K_L$intercept, Z_lm_intercept_K_L = map_dbl(Z_lm_K_L, "intercept"),
Z_lm_slope_K_L = Z_lm_K_L$slope, Z_lm_slope_K_L = map_dbl(Z_lm_K_L, "slope"),
Z_lm_R_squared_K_L = Z_lm_K_L$r_squared, Z_lm_R_squared_K_L = map_dbl(Z_lm_K_L, "r_squared"),
Z_lm_Score_K_L = Z_lm_K_L$score, Z_lm_Score_K_L = map_dbl(Z_lm_K_L, "score"),
Z_lm_intercept_r_L = Z_lm_r_L$intercept, Z_lm_intercept_r_L = map_dbl(Z_lm_r_L, "intercept"),
Z_lm_slope_r_L = Z_lm_r_L$slope, Z_lm_slope_r_L = map_dbl(Z_lm_r_L, "slope"),
Z_lm_R_squared_r_L = Z_lm_r_L$r_squared, Z_lm_R_squared_r_L = map_dbl(Z_lm_r_L, "r_squared"),
Z_lm_Score_r_L = Z_lm_r_L$score, Z_lm_Score_r_L = map_dbl(Z_lm_r_L, "score"),
Z_lm_intercept_R_AUC_L = Z_lm_R_AUC_L$intercept, Z_lm_intercept_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "intercept"),
Z_lm_slope_R_AUC_L = Z_lm_R_AUC_L$slope, Z_lm_slope_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "slope"),
Z_lm_R_squared_R_AUC_L = Z_lm_R_AUC_L$r_squared, Z_lm_R_squared_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "r_squared"),
Z_lm_Score_R_AUC_L = Z_lm_R_AUC_L$score, Z_lm_Score_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "score"),
Z_lm_intercept_R_r_K = Z_lm_R_r_K$intercept, Z_lm_intercept_R_r_K = map_dbl(Z_lm_R_r_K, "intercept"),
Z_lm_slope_R_r_K = Z_lm_R_r_K$slope, Z_lm_slope_R_r_K = map_dbl(Z_lm_R_r_K, "slope"),
Z_lm_R_squared_R_r_K = Z_lm_R_r_K$r_squared, Z_lm_R_squared_R_r_K = map_dbl(Z_lm_R_r_K, "r_squared"),
Z_lm_Score_R_r_K = Z_lm_R_r_K$score, Z_lm_Score_R_r_K = map_dbl(Z_lm_R_r_K, "score"),
Z_lm_intercept_R_AUC_K = Z_lm_R_AUC_K$intercept, Z_lm_intercept_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "intercept"),
Z_lm_slope_R_AUC_K = Z_lm_R_AUC_K$slope, Z_lm_slope_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "slope"),
Z_lm_R_squared_R_AUC_K = Z_lm_R_AUC_K$r_squared, Z_lm_R_squared_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "r_squared"),
Z_lm_Score_R_AUC_K = Z_lm_R_AUC_K$score, Z_lm_Score_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "score"),
Z_lm_intercept_R_AUC_r = Z_lm_R_AUC_r$intercept, Z_lm_intercept_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "intercept"),
Z_lm_slope_R_AUC_r = Z_lm_R_AUC_r$slope, Z_lm_slope_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "slope"),
Z_lm_R_squared_R_AUC_r = Z_lm_R_AUC_r$r_squared, Z_lm_R_squared_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "r_squared"),
Z_lm_Score_R_AUC_r = Z_lm_R_AUC_r$score Z_lm_Score_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "score")
) %>% ) %>%
# Drop the lm objects and keep only the lm results
select( select(
-lm_L, -lm_K, -lm_r, -lm_AUC, -Z_lm_K_L, -Z_lm_r_L, -Z_lm_R_AUC_L, -lm_L, -lm_K, -lm_r, -lm_AUC, -Z_lm_K_L, -Z_lm_r_L, -Z_lm_R_AUC_L,
-Z_lm_R_r_K, -Z_lm_R_AUC_K, -Z_lm_R_AUC_r -Z_lm_R_r_K, -Z_lm_R_AUC_K, -Z_lm_R_AUC_r)
)
} # end deletion-specific block } # end deletion-specific block
# Create the final calculations and interactions dataframes with only required columns for csv output # Create the final calculations and interactions dataframes with only required columns for csv output