Simplify calculate_interaction_scores()
This commit is contained in:
@@ -215,7 +215,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
WT_sd_r = sd_r,
|
WT_sd_r = sd_r,
|
||||||
WT_sd_AUC = sd_AUC
|
WT_sd_AUC = sd_AUC
|
||||||
) %>%
|
) %>%
|
||||||
group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
|
group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>%
|
||||||
mutate(
|
mutate(
|
||||||
N = sum(!is.na(L)),
|
N = sum(!is.na(L)),
|
||||||
NG = sum(NG, na.rm = TRUE),
|
NG = sum(NG, na.rm = TRUE),
|
||||||
@@ -229,18 +229,20 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
sd = ~sd(., na.rm = TRUE),
|
sd = ~sd(., na.rm = TRUE),
|
||||||
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
|
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
|
||||||
), .names = "{.fn}_{.col}")
|
), .names = "{.fn}_{.col}")
|
||||||
)
|
) %>%
|
||||||
|
ungroup()
|
||||||
|
|
||||||
stats <- stats %>%
|
stats <- stats %>%
|
||||||
|
group_by(OrfRep, Gene, num) %>%
|
||||||
mutate(
|
mutate(
|
||||||
Raw_Shift_L = mean_L[[1]] - bg_means$L,
|
Raw_Shift_L = first(mean_L) - bg_means$L,
|
||||||
Raw_Shift_K = mean_K[[1]] - bg_means$K,
|
Raw_Shift_K = first(mean_K) - bg_means$K,
|
||||||
Raw_Shift_r = mean_r[[1]] - bg_means$r,
|
Raw_Shift_r = first(mean_r) - bg_means$r,
|
||||||
Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
|
Raw_Shift_AUC = first(mean_AUC) - bg_means$AUC,
|
||||||
Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L,
|
Z_Shift_L = first(Raw_Shift_L) / bg_sd$L,
|
||||||
Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K,
|
Z_Shift_K = first(Raw_Shift_K) / bg_sd$K,
|
||||||
Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r,
|
Z_Shift_r = first(Raw_Shift_r) / bg_sd$r,
|
||||||
Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC
|
Z_Shift_AUC = first(Raw_Shift_AUC) / bg_sd$AUC
|
||||||
)
|
)
|
||||||
|
|
||||||
stats <- stats %>%
|
stats <- stats %>%
|
||||||
@@ -270,86 +272,53 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
Zscore_K = Delta_K / WT_sd_K,
|
Zscore_K = Delta_K / WT_sd_K,
|
||||||
Zscore_r = Delta_r / WT_sd_r,
|
Zscore_r = Delta_r / WT_sd_r,
|
||||||
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
||||||
) %>%
|
|
||||||
ungroup()
|
|
||||||
|
|
||||||
# Create linear models with error handling for missing/insufficient data
|
|
||||||
# This part is a PITA so best to contain it in its own function
|
|
||||||
calculate_lm_values <- function(y, x) {
|
|
||||||
if (length(unique(x)) > 1 && sum(!is.na(y)) > 1) {
|
|
||||||
# Suppress warnings only for perfect fits or similar issues
|
|
||||||
model <- suppressWarnings(lm(y ~ x))
|
|
||||||
coefficients <- coef(model)
|
|
||||||
r_squared <- tryCatch({
|
|
||||||
summary(model)$r.squared
|
|
||||||
}, warning = function(w) {
|
|
||||||
NA # Set r-squared to NA if there's a warning
|
|
||||||
})
|
|
||||||
return(list(intercept = coefficients[1], slope = coefficients[2], r_squared = r_squared))
|
|
||||||
} else {
|
|
||||||
return(list(intercept = NA, slope = NA, r_squared = NA))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lms <- stats %>%
|
|
||||||
group_by(across(all_of(group_vars))) %>%
|
|
||||||
reframe(
|
|
||||||
lm_L = list(calculate_lm_values(Delta_L, conc_num_factor)),
|
|
||||||
lm_K = list(calculate_lm_values(Delta_K, conc_num_factor)),
|
|
||||||
lm_r = list(calculate_lm_values(Delta_r, conc_num_factor)),
|
|
||||||
lm_AUC = list(calculate_lm_values(Delta_AUC, conc_num_factor))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
lms <- lms %>%
|
|
||||||
mutate(
|
|
||||||
lm_L_intercept = sapply(lm_L, `[[`, "intercept"),
|
|
||||||
lm_L_slope = sapply(lm_L, `[[`, "slope"),
|
|
||||||
lm_L_r_squared = sapply(lm_L, `[[`, "r_squared"),
|
|
||||||
lm_K_intercept = sapply(lm_K, `[[`, "intercept"),
|
|
||||||
lm_K_slope = sapply(lm_K, `[[`, "slope"),
|
|
||||||
lm_K_r_squared = sapply(lm_K, `[[`, "r_squared"),
|
|
||||||
lm_r_intercept = sapply(lm_r, `[[`, "intercept"),
|
|
||||||
lm_r_slope = sapply(lm_r, `[[`, "slope"),
|
|
||||||
lm_r_r_squared = sapply(lm_r, `[[`, "r_squared"),
|
|
||||||
lm_AUC_intercept = sapply(lm_AUC, `[[`, "intercept"),
|
|
||||||
lm_AUC_slope = sapply(lm_AUC, `[[`, "slope"),
|
|
||||||
lm_AUC_r_squared = sapply(lm_AUC, `[[`, "r_squared")
|
|
||||||
) %>%
|
|
||||||
select(-lm_L, -lm_K, -lm_r, -lm_AUC)
|
|
||||||
|
|
||||||
stats <- stats %>%
|
stats <- stats %>%
|
||||||
left_join(lms, by = group_vars) %>%
|
|
||||||
mutate(
|
mutate(
|
||||||
lm_Score_L = lm_L_slope * max_conc + lm_L_intercept,
|
|
||||||
lm_Score_K = lm_K_slope * max_conc + lm_K_intercept,
|
|
||||||
lm_Score_r = lm_r_slope * max_conc + lm_r_intercept,
|
|
||||||
lm_Score_AUC = lm_AUC_slope * max_conc + lm_AUC_intercept,
|
|
||||||
R_Squared_L = lm_L_r_squared,
|
|
||||||
R_Squared_K = lm_K_r_squared,
|
|
||||||
R_Squared_r = lm_r_r_squared,
|
|
||||||
R_Squared_AUC = lm_AUC_r_squared,
|
|
||||||
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
|
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
|
||||||
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
|
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
|
||||||
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
|
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
|
||||||
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
|
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Calculate linear models and store in own df for now
|
||||||
|
lms <- stats %>%
|
||||||
|
reframe(
|
||||||
|
L = lm(Delta_L ~ conc_num_factor),
|
||||||
|
K = lm(Delta_K ~ conc_num_factor),
|
||||||
|
r = lm(Delta_r ~ conc_num_factor),
|
||||||
|
AUC = lm(Delta_AUC ~ conc_num_factor)
|
||||||
|
)
|
||||||
|
|
||||||
stats <- stats %>%
|
stats <- stats %>%
|
||||||
mutate(
|
mutate(
|
||||||
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
|
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
|
||||||
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
|
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
|
||||||
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
|
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
|
||||||
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
|
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
|
||||||
|
lm_Score_L = max_conc * coef(lms$L)[2] + coef(lms$L)[1],
|
||||||
|
lm_Score_K = max_conc * coef(lms$K)[2] + coef(lms$K)[1],
|
||||||
|
lm_Score_r = max_conc * coef(lms$r)[2] + coef(lms$r)[1],
|
||||||
|
lm_Score_AUC = max_conc * coef(lms$AUC)[2] + coef(lms$AUC)[1],
|
||||||
|
R_Squared_L = summary(lms$L)$r.squared,
|
||||||
|
R_Squared_K = summary(lms$K)$r.squared,
|
||||||
|
R_Squared_r = summary(lms$r)$r.squared,
|
||||||
|
R_Squared_AUC = summary(lms$AUC)$r.squared
|
||||||
|
)
|
||||||
|
|
||||||
|
stats <- stats %>%
|
||||||
|
mutate(
|
||||||
Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
|
Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
|
||||||
Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
|
Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
|
||||||
Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
|
Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
|
||||||
Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
|
Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
|
||||||
) %>%
|
)
|
||||||
ungroup()
|
|
||||||
|
|
||||||
# Declare column order for output
|
# Declare column order for output
|
||||||
calculations <- stats %>%
|
calculations <- stats %>%
|
||||||
select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
|
select(
|
||||||
|
"OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
|
||||||
"mean_L", "mean_K", "mean_r", "mean_AUC",
|
"mean_L", "mean_K", "mean_r", "mean_AUC",
|
||||||
"median_L", "median_K", "median_r", "median_AUC",
|
"median_L", "median_K", "median_r", "median_AUC",
|
||||||
"sd_L", "sd_K", "sd_r", "sd_AUC",
|
"sd_L", "sd_K", "sd_r", "sd_AUC",
|
||||||
@@ -364,7 +333,8 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
"NG", "SM", "DB")
|
"NG", "SM", "DB")
|
||||||
|
|
||||||
interactions <- stats %>%
|
interactions <- stats %>%
|
||||||
select("OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
|
select(
|
||||||
|
"OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
|
||||||
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||||
"lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
|
"lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
|
||||||
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
||||||
@@ -375,6 +345,8 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
arrange(desc(lm_Score_L)) %>%
|
arrange(desc(lm_Score_L)) %>%
|
||||||
arrange(desc(NG))
|
arrange(desc(NG))
|
||||||
|
|
||||||
|
print(df, n = 1)
|
||||||
|
print(calculations, n = 1)
|
||||||
df <- df %>% select(-any_of(setdiff(names(calculations), group_vars)))
|
df <- df %>% select(-any_of(setdiff(names(calculations), group_vars)))
|
||||||
df <- left_join(df, calculations, by = group_vars)
|
df <- left_join(df, calculations, by = group_vars)
|
||||||
# df <- df %>% select(-any_of(setdiff(names(interactions), group_vars)))
|
# df <- df %>% select(-any_of(setdiff(names(interactions), group_vars)))
|
||||||
|
|||||||
Reference in New Issue
Block a user