Add correlation lms to calculate_interaction_scores()
This commit is contained in:
@@ -217,6 +217,22 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
group_vars <- c("OrfRep", "Gene", "Drug")
|
||||
}
|
||||
|
||||
# This is a helper function to perform a linear regression and extract coefficients
|
||||
perform_lm <- function(data, response, predictor, max_conc) {
|
||||
valid_data <- data %>% filter(!is.na(.data[[response]]))
|
||||
if (nrow(valid_data) > 1) {
|
||||
model <- lm(.data[[response]] ~ .data[[predictor]], data = valid_data)
|
||||
list(
|
||||
intercept = coef(model)[1],
|
||||
slope = coef(model)[2],
|
||||
r_squared = summary(model)$r.squared,
|
||||
score = max_conc * coef(model)[2] + coef(model)[1]
|
||||
)
|
||||
} else {
|
||||
list(intercept = NA, slope = NA, r_squared = NA, score = NA)
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate WT statistics from df_bg
|
||||
wt_stats <- df_bg %>%
|
||||
group_by(across(all_of(bg_group_vars))) %>%
|
||||
@@ -300,41 +316,32 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
ungroup() %>% # Ungroup before group_modify
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
group_modify(~ {
|
||||
lm_L <- perform_lm(.x, "Delta_L", "conc_num_factor", max_conc)
|
||||
lm_K <- perform_lm(.x, "Delta_K", "conc_num_factor", max_conc)
|
||||
lm_r <- perform_lm(.x, "Delta_r", "conc_num_factor", max_conc)
|
||||
lm_AUC <- perform_lm(.x, "Delta_AUC", "conc_num_factor", max_conc)
|
||||
|
||||
# Filter each column for valid data or else linear modeling will fail
|
||||
valid_data_L <- .x %>% filter(!is.na(Delta_L))
|
||||
valid_data_K <- .x %>% filter(!is.na(Delta_K))
|
||||
valid_data_r <- .x %>% filter(!is.na(Delta_r))
|
||||
valid_data_AUC <- .x %>% filter(!is.na(Delta_AUC))
|
||||
|
||||
# Perform linear modeling
|
||||
lm_L <- if (nrow(valid_data_L) > 1) lm(Delta_L ~ conc_num_factor, data = valid_data_L) else NULL
|
||||
lm_K <- if (nrow(valid_data_K) > 1) lm(Delta_K ~ conc_num_factor, data = valid_data_K) else NULL
|
||||
lm_r <- if (nrow(valid_data_r) > 1) lm(Delta_r ~ conc_num_factor, data = valid_data_r) else NULL
|
||||
lm_AUC <- if (nrow(valid_data_AUC) > 1) lm(Delta_AUC ~ conc_num_factor, data = valid_data_AUC) else NULL
|
||||
|
||||
# Extract coefficients for calculations and plotting
|
||||
.x %>%
|
||||
mutate(
|
||||
lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA,
|
||||
lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA,
|
||||
R_Squared_L = if (!is.null(lm_L)) summary(lm_L)$r.squared else NA,
|
||||
lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA,
|
||||
lm_intercept_L = lm_L$intercept,
|
||||
lm_slope_L = lm_L$slope,
|
||||
R_Squared_L = lm_L$r_squared,
|
||||
lm_Score_L = lm_L$score,
|
||||
|
||||
lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA,
|
||||
lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA,
|
||||
R_Squared_K = if (!is.null(lm_K)) summary(lm_K)$r.squared else NA,
|
||||
lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA,
|
||||
lm_intercept_K = lm_K$intercept,
|
||||
lm_slope_K = lm_K$slope,
|
||||
R_Squared_K = lm_K$r_squared,
|
||||
lm_Score_K = lm_K$score,
|
||||
|
||||
lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA,
|
||||
lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA,
|
||||
R_Squared_r = if (!is.null(lm_r)) summary(lm_r)$r.squared else NA,
|
||||
lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA,
|
||||
lm_intercept_r = lm_r$intercept,
|
||||
lm_slope_r = lm_r$slope,
|
||||
R_Squared_r = lm_r$r_squared,
|
||||
lm_Score_r = lm_r$score,
|
||||
|
||||
lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA,
|
||||
lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA,
|
||||
R_Squared_AUC = if (!is.null(lm_AUC)) summary(lm_AUC)$r.squared else NA,
|
||||
lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA
|
||||
lm_intercept_AUC = lm_AUC$intercept,
|
||||
lm_slope_AUC = lm_AUC$slope,
|
||||
R_Squared_AUC = lm_AUC$r_squared,
|
||||
lm_Score_AUC = lm_AUC$score
|
||||
)
|
||||
}) %>%
|
||||
ungroup()
|
||||
@@ -404,29 +411,42 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
Z_Shift_r = first(Z_Shift_r),
|
||||
Z_Shift_AUC = first(Z_Shift_AUC),
|
||||
|
||||
# R Squared values
|
||||
R_Squared_L = first(R_Squared_L),
|
||||
R_Squared_K = first(R_Squared_K),
|
||||
R_Squared_r = first(R_Squared_r),
|
||||
R_Squared_AUC = first(R_Squared_AUC),
|
||||
|
||||
# lm intercepts
|
||||
lm_intercept_L = first(lm_intercept_L),
|
||||
lm_intercept_K = first(lm_intercept_K),
|
||||
lm_intercept_r = first(lm_intercept_r),
|
||||
lm_intercept_AUC = first(lm_intercept_AUC),
|
||||
|
||||
# lm slopes
|
||||
lm_slope_L = first(lm_slope_L),
|
||||
lm_slope_K = first(lm_slope_K),
|
||||
lm_slope_r = first(lm_slope_r),
|
||||
lm_slope_AUC = first(lm_slope_AUC),
|
||||
|
||||
# NG, DB, SM values
|
||||
NG = first(NG),
|
||||
DB = first(DB),
|
||||
SM = first(SM),
|
||||
.groups = "drop"
|
||||
) %>%
|
||||
arrange(desc(Z_lm_L), desc(NG))
|
||||
|
||||
# Deletion data ranking and linear modeling
|
||||
if (type == "deletion") {
|
||||
interactions <- interactions %>%
|
||||
mutate(
|
||||
Avg_Zscore_L_adjusted = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
|
||||
Avg_Zscore_K_adjusted = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
|
||||
Avg_Zscore_r_adjusted = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
|
||||
Avg_Zscore_AUC_adjusted = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
|
||||
Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
|
||||
Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
|
||||
Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
|
||||
Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC)
|
||||
) %>%
|
||||
mutate(
|
||||
Rank_L = rank(Avg_Zscore_L_adjusted),
|
||||
Rank_K = rank(Avg_Zscore_K_adjusted),
|
||||
Rank_r = rank(Avg_Zscore_r_adjusted),
|
||||
Rank_AUC = rank(Avg_Zscore_AUC_adjusted),
|
||||
Rank_lm_L = rank(Z_lm_L_adjusted),
|
||||
Rank_lm_K = rank(Z_lm_K_adjusted),
|
||||
Rank_lm_r = rank(Z_lm_r_adjusted),
|
||||
Rank_lm_AUC = rank(Z_lm_AUC_adjusted)
|
||||
) %>%
|
||||
mutate(
|
||||
lm_R_squared_rank_L = summary(lm(Rank_lm_L ~ Rank_L, data = .))$r.squared,
|
||||
lm_R_squared_rank_K = summary(lm(Rank_lm_K ~ Rank_K, data = .))$r.squared,
|
||||
lm_R_squared_rank_r = summary(lm(Rank_lm_r ~ Rank_r, data = .))$r.squared,
|
||||
lm_R_squared_rank_AUC = summary(lm(Rank_lm_AUC ~ Rank_AUC, data = .))$r.squared
|
||||
)
|
||||
|
||||
# Add overlap threshold categories based on Z-lm and Avg-Z scores
|
||||
@@ -443,16 +463,64 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Zscore",
|
||||
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
||||
TRUE ~ "No Effect"
|
||||
),
|
||||
)) %>%
|
||||
group_modify(~ {
|
||||
|
||||
# For rank plots
|
||||
lm_L <- perform_lm(.x, "Z_lm_L", "Avg_Zscore_L", max_conc)
|
||||
lm_K <- perform_lm(.x, "Z_lm_K", "Avg_Zscore_K", max_conc)
|
||||
lm_r <- perform_lm(.x, "Z_lm_r", "Avg_Zscore_r", max_conc)
|
||||
lm_AUC <- perform_lm(.x, "Z_lm_AUC", "Avg_Zscore_AUC", max_conc)
|
||||
|
||||
# For correlation plots
|
||||
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
|
||||
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
|
||||
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
|
||||
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
||||
)
|
||||
Z_lm_K_L <- perform_lm(.x, "Z_lm_K", "Z_lm_L", max_conc)
|
||||
Z_lm_r_L <- perform_lm(.x, "Z_lm_r", "Z_lm_L", max_conc)
|
||||
Z_lm_R_AUC_L <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_L", max_conc)
|
||||
Z_lm_R_r_K <- perform_lm(.x, "Z_lm_R_r", "Z_lm_K", max_conc)
|
||||
Z_lm_R_AUC_K <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_K", max_conc)
|
||||
Z_lm_R_AUC_r <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_r", max_conc)
|
||||
|
||||
# Creating the final calculations and interactions dataframes with only required columns for csv output
|
||||
.x %>%
|
||||
mutate(
|
||||
# For rank plots
|
||||
lm_R_squared_L = lm_L$r.squared,
|
||||
lm_R_squared_K = lm_K$r.squared,
|
||||
lm_R_squared_r = lm_r$r.squared,
|
||||
lm_R_squared_AUC = lm_AUC$r.squared,
|
||||
|
||||
# For correlation plots
|
||||
Z_lm_R_squared_K_L = Z_lm_K_L$r.squared,
|
||||
Z_lm_R_squared_r_L = Z_lm_r_L$r.squared,
|
||||
Z_lm_R_squared_AUC_L = Z_lm_R_AUC_L$r.squared,
|
||||
Z_lm_R_squared_r_K = Z_lm_R_r_K$r.squared,
|
||||
Z_lm_R_squared_AUC_K = Z_lm_R_AUC_K$r.squared,
|
||||
Z_lm_R_squared_AUC_r = Z_lm_R_AUC_r$r.squared,
|
||||
|
||||
Z_lm_intercept_K_L = Z_lm_K_L$intercept,
|
||||
Z_lm_intercept_r_L = Z_lm_r_L$intercept,
|
||||
Z_lm_intercept_AUC_L = Z_lm_R_AUC_L$intercept,
|
||||
Z_lm_intercept_r_K = Z_lm_R_r_K$intercept,
|
||||
Z_lm_intercept_AUC_K = Z_lm_R_AUC_K$intercept,
|
||||
Z_lm_intercept_AUC_r = Z_lm_R_AUC_r$intercept,
|
||||
|
||||
Z_lm_slope_K_L = Z_lm_K_L$slope,
|
||||
Z_lm_slope_r_L = Z_lm_r_L$slope,
|
||||
Z_lm_slope_AUC_L = Z_lm_R_AUC_L$slope,
|
||||
Z_lm_slope_r_K = Z_lm_R_r_K$slope,
|
||||
Z_lm_slope_AUC_K = Z_lm_R_AUC_K$slope,
|
||||
Z_lm_slope_AUC_r = Z_lm_R_AUC_r$slope,
|
||||
|
||||
Z_lm_Score_K_L = Z_lm_K_L$score,
|
||||
Z_lm_Score_r_L = Z_lm_r_L$score,
|
||||
Z_lm_Score_AUC_L = Z_lm_R_AUC_L$score,
|
||||
Z_lm_Score_r_K = Z_lm_R_r_K$score,
|
||||
Z_lm_Score_AUC_K = Z_lm_R_AUC_K$score,
|
||||
Z_lm_Score_AUC_r = Z_lm_R_AUC_r$score
|
||||
)
|
||||
})
|
||||
} # end deletion-specific block
|
||||
|
||||
# Create the final calculations and interactions dataframes with only required columns for csv output
|
||||
df_calculations <- calculations %>%
|
||||
select(
|
||||
all_of(group_vars),
|
||||
@@ -484,7 +552,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
lm_slope_L, lm_slope_K, lm_slope_r, lm_slope_AUC, Overlap
|
||||
)
|
||||
|
||||
# Join calculations and interactions to avoid dimension mismatch
|
||||
# Avoid column collision on left join for overlapping variables
|
||||
calculations_no_overlap <- calculations %>%
|
||||
select(-any_of(c("DB", "NG", "SM",
|
||||
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||
@@ -1091,22 +1159,12 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
|
||||
))
|
||||
}
|
||||
|
||||
generate_rank_plot_configs <- function(df, is_lm = FALSE, adjust = FALSE, filter_na = FALSE, overlap_color = FALSE) {
|
||||
generate_rank_plot_configs <- function(df, is_lm = FALSE, filter_na = FALSE, overlap_color = FALSE) {
|
||||
sd_bands <- c(1, 2, 3)
|
||||
plot_configs <- list()
|
||||
|
||||
variables <- c("L", "K")
|
||||
|
||||
# Adjust (if necessary) and rank columns
|
||||
for (variable in variables) {
|
||||
if (adjust) {
|
||||
df[[paste0("Avg_Zscore_", variable)]] <- ifelse(is.na(df[[paste0("Avg_Zscore_", variable)]]), 0.001, df[[paste0("Avg_Zscore_", variable)]])
|
||||
df[[paste0("Z_lm_", variable)]] <- ifelse(is.na(df[[paste0("Z_lm_", variable)]]), 0.001, df[[paste0("Z_lm_", variable)]])
|
||||
}
|
||||
df[[paste0("Rank_", variable)]] <- rank(df[[paste0("Avg_Zscore_", variable)]], na.last = "keep")
|
||||
df[[paste0("Rank_lm_", variable)]] <- rank(df[[paste0("Z_lm_", variable)]], na.last = "keep")
|
||||
}
|
||||
|
||||
# Helper function to create a rank plot configuration
|
||||
create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, filter_na, with_annotations = TRUE) {
|
||||
num_enhancers <- sum(df[[zscore_var]] >= sd_band, na.rm = TRUE)
|
||||
@@ -1578,9 +1636,9 @@ main <- function() {
|
||||
write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||
|
||||
message("Generating reference interaction plots")
|
||||
reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
|
||||
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
||||
# message("Generating reference interaction plots")
|
||||
# reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
|
||||
# generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
||||
|
||||
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
||||
df_deletion <- df_na_stats %>% # formerly X2
|
||||
@@ -1648,7 +1706,6 @@ main <- function() {
|
||||
# rank_plot_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = FALSE,
|
||||
# adjust = TRUE
|
||||
# )
|
||||
# generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
|
||||
# page_width = 18, page_height = 12)
|
||||
@@ -1657,7 +1714,6 @@ main <- function() {
|
||||
# rank_lm_plot_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = TRUE,
|
||||
# adjust = TRUE
|
||||
# )
|
||||
# generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
|
||||
# page_width = 18, page_height = 12)
|
||||
@@ -1666,7 +1722,6 @@ main <- function() {
|
||||
# rank_plot_filtered_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = FALSE,
|
||||
# adjust = FALSE,
|
||||
# filter_na = TRUE,
|
||||
# overlap_color = TRUE
|
||||
# )
|
||||
@@ -1677,7 +1732,6 @@ main <- function() {
|
||||
# rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = TRUE,
|
||||
# adjust = FALSE,
|
||||
# filter_na = TRUE,
|
||||
# overlap_color = TRUE
|
||||
# )
|
||||
|
||||
Reference in New Issue
Block a user