|
@@ -219,9 +219,9 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
|
|
|
# This is a helper function to perform a linear regression and extract coefficients
|
|
|
perform_lm <- function(data, response, predictor, max_conc) {
|
|
|
- valid_data <- data %>% filter(!is.na(.data[[response]]))
|
|
|
+ valid_data <- data %>% filter(!is.na(data[[response]]))
|
|
|
if (nrow(valid_data) > 1) {
|
|
|
- model <- lm(.data[[response]] ~ .data[[predictor]], data = valid_data)
|
|
|
+ model <- lm(data[[response]] ~ data[[predictor]], data = valid_data)
|
|
|
list(
|
|
|
intercept = coef(model)[1],
|
|
|
slope = coef(model)[2],
|
|
@@ -286,8 +286,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
NG_sum = sum(NG, na.rm = TRUE),
|
|
|
DB_sum = sum(DB, na.rm = TRUE),
|
|
|
SM_sum = sum(SM, na.rm = TRUE),
|
|
|
- num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
|
|
|
-
|
|
|
+
|
|
|
# Expected values
|
|
|
Exp_L = WT_L + Raw_Shift_L,
|
|
|
Exp_K = WT_K + Raw_Shift_K,
|
|
@@ -374,10 +373,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
|
|
|
lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
|
|
|
lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
|
|
|
- lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
|
|
|
- ) %>%
|
|
|
- # Calculate Z-lm scores
|
|
|
- mutate(
|
|
|
+ lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE),
|
|
|
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
|
|
|
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
|
|
|
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
|
|
@@ -388,10 +384,24 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
interactions <- calculations %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
summarise(
|
|
|
- Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
|
|
|
- Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
|
|
|
- Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / (total_conc_num - 1),
|
|
|
- Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / (total_conc_num - 1),
|
|
|
+
|
|
|
+ num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
|
|
|
+
|
|
|
+ Sum_Z_Score_L = sum(Zscore_L, na.rm = TRUE),
|
|
|
+ Sum_Z_Score_K = sum(Zscore_K, na.rm = TRUE),
|
|
|
+ Sum_Z_Score_r = sum(Zscore_r, na.rm = TRUE),
|
|
|
+ Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE),
|
|
|
+
|
|
|
+ Avg_Zscore_L = Sum_Z_Score_L / first(num_non_removed_concs),
|
|
|
+ Avg_Zscore_K = Sum_Z_Score_K / first(num_non_removed_concs),
|
|
|
+ Avg_Zscore_r = Sum_Z_Score_r / first(num_non_removed_concs),
|
|
|
+ Avg_Zscore_AUC = Sum_Z_Score_AUC / first(num_non_removed_concs),
|
|
|
+
|
|
|
+ # R_Squared
|
|
|
+ R_Squared_L = first(R_Squared_L),
|
|
|
+ R_Squared_K = first(R_Squared_K),
|
|
|
+ R_Squared_r = first(R_Squared_r),
|
|
|
+ R_Squared_AUC = first(R_Squared_AUC),
|
|
|
|
|
|
# Interaction Z-scores
|
|
|
Z_lm_L = first(Z_lm_L),
|
|
@@ -411,13 +421,19 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
Z_Shift_r = first(Z_Shift_r),
|
|
|
Z_Shift_AUC = first(Z_Shift_AUC),
|
|
|
|
|
|
+ # Gene-Gene Interaction
|
|
|
+ lm_Score_L = first(lm_Score_L),
|
|
|
+ lm_Score_K = first(lm_Score_K),
|
|
|
+ lm_Score_r = first(lm_Score_r),
|
|
|
+ lm_Score_AUC = first(lm_Score_AUC),
|
|
|
+
|
|
|
# NG, DB, SM values
|
|
|
- NG = first(NG),
|
|
|
- DB = first(DB),
|
|
|
- SM = first(SM),
|
|
|
+ NG_sum_int = sum(NG),
|
|
|
+ DB_sum_int = sum(DB),
|
|
|
+ SM_sum_int = sum(SM),
|
|
|
.groups = "drop"
|
|
|
) %>%
|
|
|
- arrange(desc(Z_lm_L), desc(NG))
|
|
|
+ arrange(desc(Z_lm_L), desc(NG_sum_int))
|
|
|
|
|
|
# Deletion data ranking and linear modeling
|
|
|
if (type == "deletion") {
|
|
@@ -463,61 +479,78 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Zscore",
|
|
|
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
|
|
TRUE ~ "No Effect"
|
|
|
- )) %>%
|
|
|
- group_modify(~ {
|
|
|
-
|
|
|
+ ),
|
|
|
+
|
|
|
# For rank plots
|
|
|
- lm_L <- perform_lm(.x, "Z_lm_L", "Avg_Zscore_L", max_conc)
|
|
|
- lm_K <- perform_lm(.x, "Z_lm_K", "Avg_Zscore_K", max_conc)
|
|
|
- lm_r <- perform_lm(.x, "Z_lm_r", "Avg_Zscore_r", max_conc)
|
|
|
- lm_AUC <- perform_lm(.x, "Z_lm_AUC", "Avg_Zscore_AUC", max_conc)
|
|
|
+ lm_L = perform_lm(., "Z_lm_L", "Avg_Zscore_L", max_conc),
|
|
|
+ lm_K = perform_lm(., "Z_lm_K", "Avg_Zscore_K", max_conc),
|
|
|
+ lm_r = perform_lm(., "Z_lm_r", "Avg_Zscore_r", max_conc),
|
|
|
+ lm_AUC = perform_lm(., "Z_lm_AUC", "Avg_Zscore_AUC", max_conc),
|
|
|
|
|
|
# For correlation plots
|
|
|
- Z_lm_K_L <- perform_lm(.x, "Z_lm_K", "Z_lm_L", max_conc)
|
|
|
- Z_lm_r_L <- perform_lm(.x, "Z_lm_r", "Z_lm_L", max_conc)
|
|
|
- Z_lm_R_AUC_L <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_L", max_conc)
|
|
|
- Z_lm_R_r_K <- perform_lm(.x, "Z_lm_R_r", "Z_lm_K", max_conc)
|
|
|
- Z_lm_R_AUC_K <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_K", max_conc)
|
|
|
- Z_lm_R_AUC_r <- perform_lm(.x, "Z_lm_R_AUC", "Z_lm_r", max_conc)
|
|
|
-
|
|
|
- .x %>%
|
|
|
- mutate(
|
|
|
- # For rank plots
|
|
|
- lm_R_squared_L = lm_L$r.squared,
|
|
|
- lm_R_squared_K = lm_K$r.squared,
|
|
|
- lm_R_squared_r = lm_r$r.squared,
|
|
|
- lm_R_squared_AUC = lm_AUC$r.squared,
|
|
|
-
|
|
|
- # For correlation plots
|
|
|
- Z_lm_R_squared_K_L = Z_lm_K_L$r.squared,
|
|
|
- Z_lm_R_squared_r_L = Z_lm_r_L$r.squared,
|
|
|
- Z_lm_R_squared_AUC_L = Z_lm_R_AUC_L$r.squared,
|
|
|
- Z_lm_R_squared_r_K = Z_lm_R_r_K$r.squared,
|
|
|
- Z_lm_R_squared_AUC_K = Z_lm_R_AUC_K$r.squared,
|
|
|
- Z_lm_R_squared_AUC_r = Z_lm_R_AUC_r$r.squared,
|
|
|
-
|
|
|
- Z_lm_intercept_K_L = Z_lm_K_L$intercept,
|
|
|
- Z_lm_intercept_r_L = Z_lm_r_L$intercept,
|
|
|
- Z_lm_intercept_AUC_L = Z_lm_R_AUC_L$intercept,
|
|
|
- Z_lm_intercept_r_K = Z_lm_R_r_K$intercept,
|
|
|
- Z_lm_intercept_AUC_K = Z_lm_R_AUC_K$intercept,
|
|
|
- Z_lm_intercept_AUC_r = Z_lm_R_AUC_r$intercept,
|
|
|
-
|
|
|
- Z_lm_slope_K_L = Z_lm_K_L$slope,
|
|
|
- Z_lm_slope_r_L = Z_lm_r_L$slope,
|
|
|
- Z_lm_slope_AUC_L = Z_lm_R_AUC_L$slope,
|
|
|
- Z_lm_slope_r_K = Z_lm_R_r_K$slope,
|
|
|
- Z_lm_slope_AUC_K = Z_lm_R_AUC_K$slope,
|
|
|
- Z_lm_slope_AUC_r = Z_lm_R_AUC_r$slope,
|
|
|
-
|
|
|
- Z_lm_Score_K_L = Z_lm_K_L$score,
|
|
|
- Z_lm_Score_r_L = Z_lm_r_L$score,
|
|
|
- Z_lm_Score_AUC_L = Z_lm_R_AUC_L$score,
|
|
|
- Z_lm_Score_r_K = Z_lm_R_r_K$score,
|
|
|
- Z_lm_Score_AUC_K = Z_lm_R_AUC_K$score,
|
|
|
- Z_lm_Score_AUC_r = Z_lm_R_AUC_r$score
|
|
|
- )
|
|
|
- })
|
|
|
+ Z_lm_K_L = perform_lm(., "Z_lm_K", "Z_lm_L", max_conc),
|
|
|
+ Z_lm_r_L = perform_lm(., "Z_lm_r", "Z_lm_L", max_conc),
|
|
|
+ Z_lm_R_AUC_L = perform_lm(., "Z_lm_AUC", "Z_lm_L", max_conc),
|
|
|
+ Z_lm_R_r_K = perform_lm(., "Z_lm_r", "Z_lm_K", max_conc),
|
|
|
+ Z_lm_R_AUC_K = perform_lm(., "Z_lm_AUC", "Z_lm_K", max_conc),
|
|
|
+ Z_lm_R_AUC_r = perform_lm(., "Z_lm_AUC", "Z_lm_r", max_conc),
|
|
|
+
|
|
|
+ # Extract coefficients and statistics for each model
|
|
|
+ lm_rank_intercept_L = lm_L$intercept,
|
|
|
+ lm_rank_slope_L = lm_L$slope,
|
|
|
+ R_Squared_L = lm_L$r_squared,
|
|
|
+ lm_Score_L = lm_L$score,
|
|
|
+
|
|
|
+ lm_intercept_K = lm_K$intercept,
|
|
|
+ lm_slope_K = lm_K$slope,
|
|
|
+ R_Squared_K = lm_K$r_squared,
|
|
|
+ lm_Score_K = lm_K$score,
|
|
|
+
|
|
|
+ lm_intercept_r = lm_r$intercept,
|
|
|
+ lm_slope_r = lm_r$slope,
|
|
|
+ R_Squared_r = lm_r$r_squared,
|
|
|
+ lm_Score_r = lm_r$score,
|
|
|
+
|
|
|
+ lm_intercept_AUC = lm_AUC$intercept,
|
|
|
+ lm_slope_AUC = lm_AUC$slope,
|
|
|
+ R_Squared_AUC = lm_AUC$r_squared,
|
|
|
+ lm_Score_AUC = lm_AUC$score,
|
|
|
+
|
|
|
+ Z_lm_intercept_K_L = Z_lm_K_L$intercept,
|
|
|
+ Z_lm_slope_K_L = Z_lm_K_L$slope,
|
|
|
+ Z_lm_R_squared_K_L = Z_lm_K_L$r_squared,
|
|
|
+ Z_lm_Score_K_L = Z_lm_K_L$score,
|
|
|
+
|
|
|
+ Z_lm_intercept_r_L = Z_lm_r_L$intercept,
|
|
|
+ Z_lm_slope_r_L = Z_lm_r_L$slope,
|
|
|
+ Z_lm_R_squared_r_L = Z_lm_r_L$r_squared,
|
|
|
+ Z_lm_Score_r_L = Z_lm_r_L$score,
|
|
|
+
|
|
|
+ Z_lm_intercept_R_AUC_L = Z_lm_R_AUC_L$intercept,
|
|
|
+ Z_lm_slope_R_AUC_L = Z_lm_R_AUC_L$slope,
|
|
|
+ Z_lm_R_squared_R_AUC_L = Z_lm_R_AUC_L$r_squared,
|
|
|
+ Z_lm_Score_R_AUC_L = Z_lm_R_AUC_L$score,
|
|
|
+
|
|
|
+ Z_lm_intercept_R_r_K = Z_lm_R_r_K$intercept,
|
|
|
+ Z_lm_slope_R_r_K = Z_lm_R_r_K$slope,
|
|
|
+ Z_lm_R_squared_R_r_K = Z_lm_R_r_K$r_squared,
|
|
|
+ Z_lm_Score_R_r_K = Z_lm_R_r_K$score,
|
|
|
+
|
|
|
+ Z_lm_intercept_R_AUC_K = Z_lm_R_AUC_K$intercept,
|
|
|
+ Z_lm_slope_R_AUC_K = Z_lm_R_AUC_K$slope,
|
|
|
+ Z_lm_R_squared_R_AUC_K = Z_lm_R_AUC_K$r_squared,
|
|
|
+ Z_lm_Score_R_AUC_K = Z_lm_R_AUC_K$score,
|
|
|
+
|
|
|
+ Z_lm_intercept_R_AUC_r = Z_lm_R_AUC_r$intercept,
|
|
|
+ Z_lm_slope_R_AUC_r = Z_lm_R_AUC_r$slope,
|
|
|
+ Z_lm_R_squared_R_AUC_r = Z_lm_R_AUC_r$r_squared,
|
|
|
+ Z_lm_Score_R_AUC_r = Z_lm_R_AUC_r$score
|
|
|
+ ) %>%
|
|
|
+ # Drop the lm objects and keep only the lm results
|
|
|
+ select(
|
|
|
+ -lm_L, -lm_K, -lm_r, -lm_AUC, -Z_lm_K_L, -Z_lm_r_L, -Z_lm_R_AUC_L,
|
|
|
+ -Z_lm_R_r_K, -Z_lm_R_AUC_K, -Z_lm_R_AUC_r
|
|
|
+ )
|
|
|
} # end deletion-specific block
|
|
|
|
|
|
# Create the final calculations and interactions dataframes with only required columns for csv output
|
|
@@ -536,35 +569,38 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|
|
Exp_L, Exp_K, Exp_r, Exp_AUC,
|
|
|
Delta_L, Delta_K, Delta_r, Delta_AUC,
|
|
|
mean_Delta_L, mean_Delta_K, mean_Delta_r, mean_Delta_AUC,
|
|
|
- Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
|
|
|
- )
|
|
|
+ Zscore_L, Zscore_K, Zscore_r, Zscore_AUC,
|
|
|
+ NG_sum, DB_sum, SM_sum
|
|
|
+ ) %>%
|
|
|
+ rename(NG = NG_sum, DB = DB_sum, SM = SM_sum)
|
|
|
|
|
|
df_interactions <- interactions %>%
|
|
|
select(
|
|
|
all_of(group_vars),
|
|
|
- NG, DB, SM,
|
|
|
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
|
|
|
+ Sum_Z_Score_L, Sum_Z_Score_K, Sum_Z_Score_r, Sum_Z_Score_AUC,
|
|
|
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
|
|
|
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
|
|
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
|
|
|
- lm_R_squared_L, lm_R_squared_K, lm_R_squared_r, lm_R_squared_AUC,
|
|
|
- lm_intercept_L, lm_intercept_K, lm_intercept_r, lm_intercept_AUC,
|
|
|
- lm_slope_L, lm_slope_K, lm_slope_r, lm_slope_AUC, Overlap
|
|
|
- )
|
|
|
+ lm_Score_L, lm_Score_K, lm_Score_r, lm_Score_AUC,
|
|
|
+ R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC,
|
|
|
+ NG_sum_int, DB_sum_int, SM_sum_int
|
|
|
+ ) %>%
|
|
|
+ rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
|
|
|
|
|
|
# Avoid column collision on left join for overlapping variables
|
|
|
calculations_no_overlap <- calculations %>%
|
|
|
select(-any_of(c("DB", "NG", "SM",
|
|
|
+ # Don't need these anywhere so easier to remove
|
|
|
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
|
|
+ "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
|
|
+ # we need these for the interactions but the original code has the same names in both datasets
|
|
|
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
|
|
- "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
|
|
|
- "lm_R_squared_L", "lm_R_squared_K", "lm_R_squared_r", "lm_R_squared_AUC",
|
|
|
- "lm_intercept_L", "lm_intercept_K", "lm_intercept_r", "lm_intercept_AUC",
|
|
|
- "lm_slope_L", "lm_slope_K", "lm_slope_r", "lm_slope_AUC"
|
|
|
+ "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC"
|
|
|
)))
|
|
|
|
|
|
full_data <- calculations_no_overlap %>%
|
|
|
- left_join(df_interactions, by = group_vars)
|
|
|
+ left_join(interactions, by = group_vars)
|
|
|
|
|
|
# Return final dataframes
|
|
|
return(list(
|
|
@@ -1239,7 +1275,7 @@ generate_rank_plot_configs <- function(df, is_lm = FALSE, filter_na = FALSE, ove
|
|
|
generate_correlation_plot_configs <- function(df, df_reference) {
|
|
|
# Define relationships for different-variable correlations
|
|
|
relationships <- list(
|
|
|
- list(x = "L", y = "K"),
|
|
|
+ list(x = "L", y = "K"), # x-var is predictor, y-var is reponse
|
|
|
list(x = "L", y = "r"),
|
|
|
list(x = "L", y = "AUC"),
|
|
|
list(x = "K", y = "r"),
|
|
@@ -1261,17 +1297,17 @@ generate_correlation_plot_configs <- function(df, df_reference) {
|
|
|
for (highlight_cyan in highlight_cyan_options) {
|
|
|
for (rel in relationships) {
|
|
|
# Extract relevant variable names for Z_lm values
|
|
|
- x_var <- paste0("Z_lm_", rel$x)
|
|
|
- y_var <- paste0("Z_lm_", rel$y)
|
|
|
+ x_var <- paste0("Z_lm_", rel$x) # predictor
|
|
|
+ y_var <- paste0("Z_lm_", rel$y) # response
|
|
|
|
|
|
# Find the max and min of both dataframes for printing linear regression line
|
|
|
xmin <- min(c(min(df[[x_var]], na.rm = TRUE), min(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
|
|
|
xmax <- max(c(max(df[[x_var]], na.rm = TRUE), max(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
|
|
|
|
|
|
# Extract the R-squared, intercept, and slope from the df (first value)
|
|
|
- intercept <- df[[paste0("lm_intercept_", rel$x)]][1]
|
|
|
- slope <- df[[paste0("lm_slope_", rel$x)]][1]
|
|
|
- r_squared <- df[[paste0("lm_R_squared_", rel$x)]][1]
|
|
|
+ intercept <- df[[paste0("Z_lm_intercept_", rel$y, "_", rel$x)]][1]
|
|
|
+ slope <- df[[paste0("Z_lm_slope_", rel$y, "_", rel$x)]][1]
|
|
|
+ r_squared <- df[[paste0("Z_lm_R_squared_", rel$y, "_", rel$x)]][1]
|
|
|
|
|
|
# Generate the label for the plot
|
|
|
plot_label <- paste("Interaction", rel$x, "vs.", rel$y)
|