|
@@ -217,6 +217,22 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
group_vars <- c("OrfRep", "Gene", "Drug")
|
|
|
}
|
|
|
|
|
|
+ # This is a helper function to perform a linear regression and extract coefficients
|
|
|
+ perform_lm <- function(data, response, predictor, max_conc) {
|
|
|
+ valid_data <- data %>% filter(!is.na(.data[[response]]))
|
|
|
+ if (nrow(valid_data) > 1) {
|
|
|
+ model <- lm(.data[[response]] ~ .data[[predictor]], data = valid_data)
|
|
|
+ list(
|
|
|
+ intercept = coef(model)[1],
|
|
|
+ slope = coef(model)[2],
|
|
|
+ r_squared = summary(model)$r.squared,
|
|
|
+ score = max_conc * coef(model)[2] + coef(model)[1]
|
|
|
+ )
|
|
|
+ } else {
|
|
|
+ list(intercept = NA, slope = NA, r_squared = NA, score = NA)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
# Calculate WT statistics from df_bg
|
|
|
wt_stats <- df_bg %>%
|
|
|
group_by(across(all_of(bg_group_vars))) %>%
|
|
@@ -300,41 +316,32 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
ungroup() %>% # Ungroup before group_modify
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
group_modify(~ {
|
|
|
+ lm_L <- perform_lm(.x, "Delta_L", "conc_num_factor", max_conc)
|
|
|
+ lm_K <- perform_lm(.x, "Delta_K", "conc_num_factor", max_conc)
|
|
|
+ lm_r <- perform_lm(.x, "Delta_r", "conc_num_factor", max_conc)
|
|
|
+ lm_AUC <- perform_lm(.x, "Delta_AUC", "conc_num_factor", max_conc)
|
|
|
|
|
|
- # Filter each column for valid data or else linear modeling will fail
|
|
|
- valid_data_L <- .x %>% filter(!is.na(Delta_L))
|
|
|
- valid_data_K <- .x %>% filter(!is.na(Delta_K))
|
|
|
- valid_data_r <- .x %>% filter(!is.na(Delta_r))
|
|
|
- valid_data_AUC <- .x %>% filter(!is.na(Delta_AUC))
|
|
|
-
|
|
|
- # Perform linear modeling
|
|
|
- lm_L <- if (nrow(valid_data_L) > 1) lm(Delta_L ~ conc_num_factor, data = valid_data_L) else NULL
|
|
|
- lm_K <- if (nrow(valid_data_K) > 1) lm(Delta_K ~ conc_num_factor, data = valid_data_K) else NULL
|
|
|
- lm_r <- if (nrow(valid_data_r) > 1) lm(Delta_r ~ conc_num_factor, data = valid_data_r) else NULL
|
|
|
- lm_AUC <- if (nrow(valid_data_AUC) > 1) lm(Delta_AUC ~ conc_num_factor, data = valid_data_AUC) else NULL
|
|
|
-
|
|
|
- # Extract coefficients for calculations and plotting
|
|
|
.x %>%
|
|
|
mutate(
|
|
|
- lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA,
|
|
|
- lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA,
|
|
|
- R_Squared_L = if (!is.null(lm_L)) summary(lm_L)$r.squared else NA,
|
|
|
- lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA,
|
|
|
-
|
|
|
- lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA,
|
|
|
- lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA,
|
|
|
- R_Squared_K = if (!is.null(lm_K)) summary(lm_K)$r.squared else NA,
|
|
|
- lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA,
|
|
|
-
|
|
|
- lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA,
|
|
|
- lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA,
|
|
|
- R_Squared_r = if (!is.null(lm_r)) summary(lm_r)$r.squared else NA,
|
|
|
- lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA,
|
|
|
-
|
|
|
- lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA,
|
|
|
- lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA,
|
|
|
- R_Squared_AUC = if (!is.null(lm_AUC)) summary(lm_AUC)$r.squared else NA,
|
|
|
- lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA
|
|
|
+ lm_intercept_L = lm_L$intercept,
|
|
|
+ lm_slope_L = lm_L$slope,
|
|
|
+ R_Squared_L = lm_L$r_squared,
|
|
|
+ lm_Score_L = lm_L$score,
|
|
|
+
|
|
|
+ lm_intercept_K = lm_K$intercept,
|
|
|
+ lm_slope_K = lm_K$slope,
|
|
|
+ R_Squared_K = lm_K$r_squared,
|
|
|
+ lm_Score_K = lm_K$score,
|
|
|
+
|
|
|
+ lm_intercept_r = lm_r$intercept,
|
|
|
+ lm_slope_r = lm_r$slope,
|
|
|
+ R_Squared_r = lm_r$r_squared,
|
|
|
+ lm_Score_r = lm_r$score,
|
|
|
+
|
|
|
+ lm_intercept_AUC = lm_AUC$intercept,
|
|
|
+ lm_slope_AUC = lm_AUC$slope,
|
|
|
+ R_Squared_AUC = lm_AUC$r_squared,
|
|
|
+ lm_Score_AUC = lm_AUC$score
|
|
|
)
|
|
|
}) %>%
|
|
|
ungroup()
|
|
@@ -404,55 +411,116 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
Z_Shift_r = first(Z_Shift_r),
|
|
|
Z_Shift_AUC = first(Z_Shift_AUC),
|
|
|
|
|
|
- # R Squared values
|
|
|
- R_Squared_L = first(R_Squared_L),
|
|
|
- R_Squared_K = first(R_Squared_K),
|
|
|
- R_Squared_r = first(R_Squared_r),
|
|
|
- R_Squared_AUC = first(R_Squared_AUC),
|
|
|
-
|
|
|
- # lm intercepts
|
|
|
- lm_intercept_L = first(lm_intercept_L),
|
|
|
- lm_intercept_K = first(lm_intercept_K),
|
|
|
- lm_intercept_r = first(lm_intercept_r),
|
|
|
- lm_intercept_AUC = first(lm_intercept_AUC),
|
|
|
-
|
|
|
- # lm slopes
|
|
|
- lm_slope_L = first(lm_slope_L),
|
|
|
- lm_slope_K = first(lm_slope_K),
|
|
|
- lm_slope_r = first(lm_slope_r),
|
|
|
- lm_slope_AUC = first(lm_slope_AUC),
|
|
|
-
|
|
|
# NG, DB, SM values
|
|
|
NG = first(NG),
|
|
|
DB = first(DB),
|
|
|
SM = first(SM),
|
|
|
.groups = "drop"
|
|
|
- )
|
|
|
+ ) %>%
|
|
|
+ arrange(desc(Z_lm_L), desc(NG))
|
|
|
+
|
|
|
+ # Deletion data ranking and linear modeling
|
|
|
+ if (type == "deletion") {
|
|
|
+ interactions <- interactions %>%
|
|
|
+ mutate(
|
|
|
+ Avg_Zscore_L_adjusted = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
|
|
|
+ Avg_Zscore_K_adjusted = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
|
|
|
+ Avg_Zscore_r_adjusted = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
|
|
|
+ Avg_Zscore_AUC_adjusted = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
|
|
|
+ Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
|
|
|
+ Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
|
|
|
+ Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
|
|
|
+ Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC)
|
|
|
+ ) %>%
|
|
|
+ mutate(
|
|
|
+ Rank_L = rank(Avg_Zscore_L_adjusted),
|
|
|
+ Rank_K = rank(Avg_Zscore_K_adjusted),
|
|
|
+ Rank_r = rank(Avg_Zscore_r_adjusted),
|
|
|
+ Rank_AUC = rank(Avg_Zscore_AUC_adjusted),
|
|
|
+ Rank_lm_L = rank(Z_lm_L_adjusted),
|
|
|
+ Rank_lm_K = rank(Z_lm_K_adjusted),
|
|
|
+ Rank_lm_r = rank(Z_lm_r_adjusted),
|
|
|
+ Rank_lm_AUC = rank(Z_lm_AUC_adjusted)
|
|
|
+ ) %>%
|
|
|
+ mutate(
|
|
|
+ lm_R_squared_rank_L = summary(lm(Rank_lm_L ~ Rank_L, data = .))$r.squared,
|
|
|
+ lm_R_squared_rank_K = summary(lm(Rank_lm_K ~ Rank_K, data = .))$r.squared,
|
|
|
+ lm_R_squared_rank_r = summary(lm(Rank_lm_r ~ Rank_r, data = .))$r.squared,
|
|
|
+ lm_R_squared_rank_AUC = summary(lm(Rank_lm_AUC ~ Rank_AUC, data = .))$r.squared
|
|
|
+ )
|
|
|
|
|
|
- # Add overlap threshold categories based on Z-lm and Avg-Z scores
|
|
|
- interactions <- interactions %>%
|
|
|
- filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
|
|
|
- mutate(
|
|
|
- Overlap = case_when(
|
|
|
- Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
|
|
|
- Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
|
|
|
- Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only",
|
|
|
- Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
|
|
|
- Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only",
|
|
|
- Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
|
|
|
- Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Zscore",
|
|
|
- Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
|
|
- TRUE ~ "No Effect"
|
|
|
- ),
|
|
|
+ # Add overlap threshold categories based on Z-lm and Avg-Z scores
|
|
|
+ interactions <- interactions %>%
|
|
|
+ filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
|
|
|
+ mutate(
|
|
|
+ Overlap = case_when(
|
|
|
+ Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
|
|
|
+ Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
|
|
|
+ Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only",
|
|
|
+ Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
|
|
|
+ Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only",
|
|
|
+ Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
|
|
|
+ Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Zscore",
|
|
|
+ Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
|
|
+ TRUE ~ "No Effect"
|
|
|
+ )) %>%
|
|
|
+ group_modify(~ {
|
|
|
|
|
|
- # For correlation plots
|
|
|
- lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
|
|
|
- lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
|
|
|
- lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
|
|
|
- lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
|
|
- )
|
|
|
+ # For rank plots
|
|
|
+ lm_L <- perform_lm(.x, "Z_lm_L", "Avg_Zscore_L", max_conc)
|
|
|
+ lm_K <- perform_lm(.x, "Z_lm_K", "Avg_Zscore_K", max_conc)
|
|
|
+ lm_r <- perform_lm(.x, "Z_lm_r", "Avg_Zscore_r", max_conc)
|
|
|
+ lm_AUC <- perform_lm(.x, "Z_lm_AUC", "Avg_Zscore_AUC", max_conc)
|
|
|
+
|
|
|
+ # For correlation plots
|
|
|
+ Z_lm_K_L <- perform_lm(.x, "Z_lm_K", "Z_lm_L", max_conc)
|
|
|
+ Z_lm_r_L <- perform_lm(.x, "Z_lm_r", "Z_lm_L", max_conc)
|
|
|
+ Z_lm_R_AUC_L <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_L", max_conc)
|
|
|
+ Z_lm_R_r_K <- perform_lm(.x, "Z_lm_R_r", "Z_lm_K", max_conc)
|
|
|
+ Z_lm_R_AUC_K <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_K", max_conc)
|
|
|
+ Z_lm_R_AUC_r <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_r", max_conc)
|
|
|
+
|
|
|
+ .x %>%
|
|
|
+ mutate(
|
|
|
+ # For rank plots
|
|
|
+ lm_R_squared_L = lm_L$r.squared,
|
|
|
+ lm_R_squared_K = lm_K$r.squared,
|
|
|
+ lm_R_squared_r = lm_r$r.squared,
|
|
|
+ lm_R_squared_AUC = lm_AUC$r.squared,
|
|
|
+
|
|
|
+ # For correlation plots
|
|
|
+ Z_lm_R_squared_K_L = Z_lm_K_L$r.squared,
|
|
|
+ Z_lm_R_squared_r_L = Z_lm_r_L$r.squared,
|
|
|
+ Z_lm_R_squared_AUC_L = Z_lm_R_AUC_L$r.squared,
|
|
|
+ Z_lm_R_squared_r_K = Z_lm_R_r_K$r.squared,
|
|
|
+ Z_lm_R_squared_AUC_K = Z_lm_R_AUC_K$r.squared,
|
|
|
+ Z_lm_R_squared_AUC_r = Z_lm_R_AUC_r$r.squared,
|
|
|
+
|
|
|
+ Z_lm_intercept_K_L = Z_lm_K_L$intercept,
|
|
|
+ Z_lm_intercept_r_L = Z_lm_r_L$intercept,
|
|
|
+ Z_lm_intercept_AUC_L = Z_lm_R_AUC_L$intercept,
|
|
|
+ Z_lm_intercept_r_K = Z_lm_R_r_K$intercept,
|
|
|
+ Z_lm_intercept_AUC_K = Z_lm_R_AUC_K$intercept,
|
|
|
+ Z_lm_intercept_AUC_r = Z_lm_R_AUC_r$intercept,
|
|
|
+
|
|
|
+ Z_lm_slope_K_L = Z_lm_K_L$slope,
|
|
|
+ Z_lm_slope_r_L = Z_lm_r_L$slope,
|
|
|
+ Z_lm_slope_AUC_L = Z_lm_R_AUC_L$slope,
|
|
|
+ Z_lm_slope_r_K = Z_lm_R_r_K$slope,
|
|
|
+ Z_lm_slope_AUC_K = Z_lm_R_AUC_K$slope,
|
|
|
+ Z_lm_slope_AUC_r = Z_lm_R_AUC_r$slope,
|
|
|
+
|
|
|
+ Z_lm_Score_K_L = Z_lm_K_L$score,
|
|
|
+ Z_lm_Score_r_L = Z_lm_r_L$score,
|
|
|
+ Z_lm_Score_AUC_L = Z_lm_R_AUC_L$score,
|
|
|
+ Z_lm_Score_r_K = Z_lm_R_r_K$score,
|
|
|
+ Z_lm_Score_AUC_K = Z_lm_R_AUC_K$score,
|
|
|
+ Z_lm_Score_AUC_r = Z_lm_R_AUC_r$score
|
|
|
+ )
|
|
|
+ })
|
|
|
+ } # end deletion-specific block
|
|
|
|
|
|
- # Creating the final calculations and interactions dataframes with only required columns for csv output
|
|
|
+ # Create the final calculations and interactions dataframes with only required columns for csv output
|
|
|
df_calculations <- calculations %>%
|
|
|
select(
|
|
|
all_of(group_vars),
|
|
@@ -484,7 +552,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
lm_slope_L, lm_slope_K, lm_slope_r, lm_slope_AUC, Overlap
|
|
|
)
|
|
|
|
|
|
- # Join calculations and interactions to avoid dimension mismatch
|
|
|
+ # Avoid column collision on left join for overlapping variables
|
|
|
calculations_no_overlap <- calculations %>%
|
|
|
select(-any_of(c("DB", "NG", "SM",
|
|
|
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
|
@@ -1091,22 +1159,12 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
|
|
|
))
|
|
|
}
|
|
|
|
|
|
-generate_rank_plot_configs <- function(df, is_lm = FALSE, adjust = FALSE, filter_na = FALSE, overlap_color = FALSE) {
|
|
|
+generate_rank_plot_configs <- function(df, is_lm = FALSE, filter_na = FALSE, overlap_color = FALSE) {
|
|
|
sd_bands <- c(1, 2, 3)
|
|
|
plot_configs <- list()
|
|
|
|
|
|
variables <- c("L", "K")
|
|
|
|
|
|
- # Adjust (if necessary) and rank columns
|
|
|
- for (variable in variables) {
|
|
|
- if (adjust) {
|
|
|
- df[[paste0("Avg_Zscore_", variable)]] <- ifelse(is.na(df[[paste0("Avg_Zscore_", variable)]]), 0.001, df[[paste0("Avg_Zscore_", variable)]])
|
|
|
- df[[paste0("Z_lm_", variable)]] <- ifelse(is.na(df[[paste0("Z_lm_", variable)]]), 0.001, df[[paste0("Z_lm_", variable)]])
|
|
|
- }
|
|
|
- df[[paste0("Rank_", variable)]] <- rank(df[[paste0("Avg_Zscore_", variable)]], na.last = "keep")
|
|
|
- df[[paste0("Rank_lm_", variable)]] <- rank(df[[paste0("Z_lm_", variable)]], na.last = "keep")
|
|
|
- }
|
|
|
-
|
|
|
# Helper function to create a rank plot configuration
|
|
|
create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, filter_na, with_annotations = TRUE) {
|
|
|
num_enhancers <- sum(df[[zscore_var]] >= sd_band, na.rm = TRUE)
|
|
@@ -1578,9 +1636,9 @@ main <- function() {
|
|
|
write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
|
|
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
|
|
|
|
|
- message("Generating reference interaction plots")
|
|
|
- reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
|
|
|
- generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
|
|
+ # message("Generating reference interaction plots")
|
|
|
+ # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
|
|
|
+ # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
|
|
|
|
|
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
|
|
df_deletion <- df_na_stats %>% # formerly X2
|
|
@@ -1648,7 +1706,6 @@ main <- function() {
|
|
|
# rank_plot_configs <- generate_rank_plot_configs(
|
|
|
# df_interactions,
|
|
|
# is_lm = FALSE,
|
|
|
- # adjust = TRUE
|
|
|
# )
|
|
|
# generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
|
|
|
# page_width = 18, page_height = 12)
|
|
@@ -1657,7 +1714,6 @@ main <- function() {
|
|
|
# rank_lm_plot_configs <- generate_rank_plot_configs(
|
|
|
# df_interactions,
|
|
|
# is_lm = TRUE,
|
|
|
- # adjust = TRUE
|
|
|
# )
|
|
|
# generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
|
|
|
# page_width = 18, page_height = 12)
|
|
@@ -1666,7 +1722,6 @@ main <- function() {
|
|
|
# rank_plot_filtered_configs <- generate_rank_plot_configs(
|
|
|
# df_interactions,
|
|
|
# is_lm = FALSE,
|
|
|
- # adjust = FALSE,
|
|
|
# filter_na = TRUE,
|
|
|
# overlap_color = TRUE
|
|
|
# )
|
|
@@ -1677,7 +1732,6 @@ main <- function() {
|
|
|
# rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
|
|
# df_interactions,
|
|
|
# is_lm = TRUE,
|
|
|
- # adjust = FALSE,
|
|
|
# filter_na = TRUE,
|
|
|
# overlap_color = TRUE
|
|
|
# )
|