Try separating interaction dfs

This commit is contained in:
2024-10-03 12:43:03 -04:00
parent f6958a0126
commit 4b273db78d

View File

@@ -160,10 +160,6 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
conc_num_factor_factor = as.factor(conc_num) conc_num_factor_factor = as.factor(conc_num)
) )
# Set the max concentration across the whole dataframe
df <- df %>%
mutate(max_conc = max(df$conc_num_factor, na.rm = TRUE))
return(df) return(df)
} }
@@ -215,7 +211,9 @@ calculate_summary_stats <- function(df, variables, group_vars) {
return(list(summary_stats = summary_stats, df_with_stats = df_joined)) return(list(summary_stats = summary_stats, df_with_stats = df_joined))
} }
calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overlap_threshold = 2) { calculate_interaction_scores <- function(df, df_bg, group_vars) {
max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
# Include background statistics per concentration # Include background statistics per concentration
bg_stats <- df_bg %>% bg_stats <- df_bg %>%
@@ -382,45 +380,16 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla
R_Squared_r = first(R_Squared_r), R_Squared_r = first(R_Squared_r),
R_Squared_AUC = first(R_Squared_AUC), R_Squared_AUC = first(R_Squared_AUC),
# Include Drug
Drug = first(Drug),
.groups = "drop" .groups = "drop"
) )
# Create the final calculations and interactions dataframes with required columns # Return the dataframes without creating full_data
calculations_df <- calculations %>%
select(
all_of(group_vars),
conc_num, conc_num_factor, conc_num_factor_factor,
N, NG, DB, SM,
mean_L, mean_K, mean_r, mean_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
WT_L, WT_K, WT_r, WT_AUC,
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
Exp_L, Exp_K, Exp_r, Exp_AUC,
Delta_L, Delta_K, Delta_r, Delta_AUC,
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
)
interactions_df <- interactions %>%
select(
all_of(group_vars),
NG, DB, SM,
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC
)
# Create full_data by joining calculations_df and interactions_df
full_data <- calculations_df %>%
left_join(interactions_df, by = group_vars, suffix = c("", "_interaction"))
# Return the dataframes
return(list( return(list(
calculations = calculations_df, calculations = calculations,
interactions = interactions_df, interactions = interactions
full_data = full_data
)) ))
} }
@@ -491,40 +460,29 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
"red" "red"
} }
y_mean_prefix <- if (!is.null(config$error_bar_params$y_mean_prefix)) {
config$error_bar_params$y_mean_prefix
} else {
"mean_"
}
y_mean_col <- paste0(y_mean_prefix, config$y_var)
# Dynamically set y_sd_col based on the provided prefix in error_bar_params
y_sd_prefix <- if (!is.null(config$error_bar_params$y_sd_prefix)) {
config$error_bar_params$y_sd_prefix
} else {
"sd_"
}
y_sd_col <- paste0(y_sd_prefix, config$y_var)
if (!is.null(config$error_bar_params$center_point)) {
plot <- plot + geom_point(aes(
x = .data[[config$x_var]],
y = first(.data[[y_mean_col]])),
color = error_bar_color,
shape = 16)
}
# Use error_bar_params if provided, otherwise calculate from mean and sd
if (!is.null(config$error_bar_params$ymin) && !is.null(config$error_bar_params$ymax)) { if (!is.null(config$error_bar_params$ymin) && !is.null(config$error_bar_params$ymax)) {
plot <- plot + geom_errorbar(aes( # Check if ymin and ymax are constants or column names
ymin = config$error_bar_params$ymin, if (is.numeric(config$error_bar_params$ymin) && is.numeric(config$error_bar_params$ymax)) {
ymax = config$error_bar_params$ymax), plot <- plot + geom_errorbar(aes(x = .data[[config$x_var]]),
color = error_bar_color) ymin = config$error_bar_params$ymin,
ymax = config$error_bar_params$ymax,
color = error_bar_color)
} else {
plot <- plot + geom_errorbar(aes(
x = .data[[config$x_var]],
ymin = .data[[config$error_bar_params$ymin]],
ymax = .data[[config$error_bar_params$ymax]]
), color = error_bar_color)
}
} else { } else {
# Original code for calculating from mean and sd
y_mean_col <- paste0("mean_", config$y_var)
y_sd_col <- paste0("sd_", config$y_var)
plot <- plot + geom_errorbar(aes( plot <- plot + geom_errorbar(aes(
ymin = first(.data[[y_mean_col]]) - first(.data[[y_sd_col]]), x = .data[[config$x_var]],
ymax = first(.data[[y_mean_col]]) + first(.data[[y_sd_col]])), ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
color = error_bar_color) ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
), color = error_bar_color)
} }
} }
@@ -756,7 +714,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
return(list(plots = plot_configs)) return(list(plots = plot_configs))
} }
generate_interaction_plot_configs <- function(df, type) { generate_interaction_plot_configs <- function(df, df_calculations, df_interactions, type) {
# Define the y-limits for the plots # Define the y-limits for the plots
limits_map <- list( limits_map <- list(
@@ -770,7 +728,7 @@ generate_interaction_plot_configs <- function(df, type) {
stats_boxplot_configs <- list() stats_boxplot_configs <- list()
delta_plot_configs <- list() delta_plot_configs <- list()
# Overall statistics plots # Overall statistics plots (use df)
OrfRep <- first(df$OrfRep) # this should correspond to the reference strain OrfRep <- first(df$OrfRep) # this should correspond to the reference strain
for (plot_type in c("scatter", "box")) { for (plot_type in c("scatter", "box")) {
@@ -804,23 +762,23 @@ generate_interaction_plot_configs <- function(df, type) {
) )
plot_config$position <- "jitter" plot_config$position <- "jitter"
annotations <- list( annotations <- list(
list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min
list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="), list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
list(x = 0.25, y = y_limits[1], label = "SM =") list(x = 0.25, y = y_limits[1], label = "SM =")
) )
# Loop over unique x values and add NG, DB, SM values at calculated y positions # Loop over unique x values and add NG, DB, SM values at calculated y positions
for (x_val in unique(df$conc_num_factor_factor)) { for (x_val in unique(df$conc_num_factor_factor)) {
current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val) current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
annotations <- append(annotations, list( annotations <- append(annotations, list(
list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = first(current_df$NG, default = 0)), list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = sum(current_df$NG, na.rm = TRUE)),
list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = first(current_df$DB, default = 0)), list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = sum(current_df$DB, na.rm = TRUE)),
list(x = x_val, y = y_limits[1], label = first(current_df$SM, default = 0)) list(x = x_val, y = y_limits[1], label = sum(current_df$SM, na.rm = TRUE))
)) ))
} }
plot_config$annotations <- annotations plot_config$annotations <- annotations
# Append to scatter plot configurations # Append to scatter plot configurations
stats_plot_configs <- append(stats_plot_configs, list(plot_config)) stats_plot_configs <- append(stats_plot_configs, list(plot_config))
@@ -836,7 +794,7 @@ generate_interaction_plot_configs <- function(df, type) {
} }
} }
# Delta interaction plots # Delta interaction plots (use df_calculations and df_interactions)
if (type == "reference") { if (type == "reference") {
group_vars <- c("OrfRep", "Gene", "num") group_vars <- c("OrfRep", "Gene", "num")
} else if (type == "deletion") { } else if (type == "deletion") {
@@ -850,7 +808,7 @@ generate_interaction_plot_configs <- function(df, type) {
AUC = c(-6000, 6000) AUC = c(-6000, 6000)
) )
grouped_data <- df %>% grouped_data <- df_calculations %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
group_split() group_split()
@@ -865,6 +823,11 @@ generate_interaction_plot_configs <- function(df, type) {
OrfRepTitle <- OrfRep OrfRepTitle <- OrfRep
} }
# Get corresponding interaction row
interaction_row <- df_interactions %>%
filter(if_all(all_of(group_vars), ~ . == first(.))) %>%
slice(1)
for (var in names(delta_limits_map)) { for (var in names(delta_limits_map)) {
y_limits <- delta_limits_map[[var]] y_limits <- delta_limits_map[[var]]
y_span <- y_limits[2] - y_limits[1] y_span <- y_limits[2] - y_limits[1]
@@ -872,17 +835,17 @@ generate_interaction_plot_configs <- function(df, type) {
# Error bars # Error bars
WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0) WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0)
# Z_Shift and lm values # Z_Shift and lm values from interaction_row
Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2) Z_Shift_value <- round(first(interaction_row[[paste0("Z_Shift_", var)]], default = 0), 2)
Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2) Z_lm_value <- round(first(interaction_row[[paste0("Z_lm_", var)]], default = 0), 2)
R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2) R_squared_value <- round(first(interaction_row[[paste0("R_Squared_", var)]], default = 0), 2)
# NG, DB, SM values # NG, DB, SM values from interaction_row
NG_value <- first(group_data$NG, default = 0) NG_value <- first(interaction_row$NG, default = 0)
DB_value <- first(group_data$DB, default = 0) DB_value <- first(interaction_row$DB, default = 0)
SM_value <- first(group_data$SM, default = 0) SM_value <- first(interaction_row$SM, default = 0)
# Use the pre-calculated lm intercept and slope from the dataframe # Use the pre-calculated lm intercept and slope from group_data
lm_intercept_col <- paste0("lm_intercept_", var) lm_intercept_col <- paste0("lm_intercept_", var)
lm_slope_col <- paste0("lm_slope_", var) lm_slope_col <- paste0("lm_slope_", var)
lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0) lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0)
@@ -906,8 +869,9 @@ generate_interaction_plot_configs <- function(df, type) {
), ),
error_bar = TRUE, error_bar = TRUE,
error_bar_params = list( error_bar_params = list(
ymin = 0 - (2 * WT_sd_value), # Passing constants directly
ymax = 0 + (2 * WT_sd_value), ymin = -2 * WT_sd_value,
ymax = 2 * WT_sd_value,
color = "black" color = "black"
), ),
smooth = TRUE, smooth = TRUE,
@@ -931,34 +895,36 @@ generate_interaction_plot_configs <- function(df, type) {
return(list( return(list(
list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_plot_configs), list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_plot_configs),
list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_boxplot_configs), list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_boxplot_configs),
list(grid_layout = list(ncol = 4, nrow = grid_nrow), plots = delta_plot_configs) list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = delta_plot_configs)
)) ))
} }
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) { generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
sd_bands <- c(1, 2, 3)
plot_configs <- list()
variables <- c("L", "K") plot_configs <- list()
sd_bands <- c(1, 2, 3)
# Adjust (if necessary) and rank columns # Adjust (if necessary) and rank columns
variables <- c("L", "K")
for (variable in variables) { for (variable in variables) {
if (adjust) { if (adjust) {
df[[paste0("Avg_Zscore_", variable)]] <- ifelse(is.na(df[[paste0("Avg_Zscore_", variable)]]), 0.001, df[[paste0("Avg_Zscore_", variable)]]) df_interactions[[paste0("Avg_Zscore_", variable)]] <-
df[[paste0("Z_lm_", variable)]] <- ifelse(is.na(df[[paste0("Z_lm_", variable)]]), 0.001, df[[paste0("Z_lm_", variable)]]) ifelse(is.na(df_interactions[[paste0("Avg_Zscore_", variable)]]), 0.001, df_interactions[[paste0("Avg_Zscore_", variable)]])
df_interactions[[paste0("Z_lm_", variable)]] <-
ifelse(is.na(df_interactions[[paste0("Z_lm_", variable)]]), 0.001, df_interactions[[paste0("Z_lm_", variable)]])
} }
df[[paste0("Rank_", variable)]] <- rank(df[[paste0("Avg_Zscore_", variable)]], na.last = "keep") df_interactions[[paste0("Rank_", variable)]] <- rank(df_interactions[[paste0("Avg_Zscore_", variable)]], na.last = "keep")
df[[paste0("Rank_lm_", variable)]] <- rank(df[[paste0("Z_lm_", variable)]], na.last = "keep") df_interactions[[paste0("Rank_lm_", variable)]] <- rank(df_interactions[[paste0("Z_lm_", variable)]], na.last = "keep")
} }
# Helper function to create a plot configuration # Helper function to create a plot configuration
create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) { create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) {
num_enhancers <- sum(df[[zscore_var]] >= sd_band, na.rm = TRUE) num_enhancers <- sum(df_interactions[[zscore_var]] >= sd_band, na.rm = TRUE)
num_suppressors <- sum(df[[zscore_var]] <= -sd_band, na.rm = TRUE) num_suppressors <- sum(df_interactions[[zscore_var]] <= -sd_band, na.rm = TRUE)
# Default plot config # Default plot config
plot_config <- list( plot_config <- list(
df = df, df = df_interactions,
x_var = rank_var, x_var = rank_var,
y_var = zscore_var, y_var = zscore_var,
plot_type = "scatter", plot_type = "scatter",
@@ -980,13 +946,13 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
# Add specific annotations for plots with annotations # Add specific annotations for plots with annotations
plot_config$annotations <- list( plot_config$annotations <- list(
list( list(
x = median(df[[rank_var]], na.rm = TRUE), x = median(df_interactions[[rank_var]], na.rm = TRUE),
y = max(df[[zscore_var]], na.rm = TRUE) * 0.9, y = max(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9,
label = paste("Deletion Enhancers =", num_enhancers) label = paste("Deletion Enhancers =", num_enhancers)
), ),
list( list(
x = median(df[[rank_var]], na.rm = TRUE), x = median(df_interactions[[rank_var]], na.rm = TRUE),
y = min(df[[zscore_var]], na.rm = TRUE) * 0.9, y = min(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9,
label = paste("Deletion Suppressors =", num_suppressors) label = paste("Deletion Suppressors =", num_suppressors)
) )
) )
@@ -1019,7 +985,7 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs)) return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs))
} }
generate_correlation_plot_configs <- function(df, correlation_stats) { generate_correlation_plot_configs <- function(df_interactions) {
# Define relationships for different-variable correlations # Define relationships for different-variable correlations
relationships <- list( relationships <- list(
list(x = "L", y = "K"), list(x = "L", y = "K"),
@@ -1030,6 +996,23 @@ generate_correlation_plot_configs <- function(df, correlation_stats) {
list(x = "r", y = "AUC") list(x = "r", y = "AUC")
) )
correlation_stats <- list()
for (rel in relationships) {
x_var <- paste0("Z_lm_", rel$x)
y_var <- paste0("Z_lm_", rel$y)
lm_fit <- lm(df_interactions[[y_var]] ~ df_interactions[[x_var]])
intercept <- coef(lm_fit)[1]
slope <- coef(lm_fit)[2]
r_squared <- summary(lm_fit)$r.squared
relationship_name <- paste0(rel$x, "_vs_", rel$y)
correlation_stats[[relationship_name]] <- list(
intercept = intercept,
slope = slope,
r_squared = r_squared
)
}
plot_configs <- list() plot_configs <- list()
# Iterate over the option to highlight cyan points (TRUE/FALSE) # Iterate over the option to highlight cyan points (TRUE/FALSE)
@@ -1053,15 +1036,15 @@ generate_correlation_plot_configs <- function(df, correlation_stats) {
# Construct plot config # Construct plot config
plot_config <- list( plot_config <- list(
df = df, df = df_interactions,
x_var = x_var, x_var = x_var,
y_var = y_var, y_var = y_var,
plot_type = "scatter", plot_type = "scatter",
title = plot_label, title = plot_label,
annotations = list( annotations = list(
list( list(
x = mean(df[[x_var]], na.rm = TRUE), x = mean(df_interactions[[x_var]], na.rm = TRUE),
y = mean(df[[y_var]], na.rm = TRUE), y = mean(df_interactions[[y_var]], na.rm = TRUE),
label = paste("R-squared =", round(r_squared, 3)) label = paste("R-squared =", round(r_squared, 3))
) )
), ),
@@ -1371,9 +1354,8 @@ main <- function() {
group_vars = c("OrfRep", "Gene", "num", "conc_num") group_vars = c("OrfRep", "Gene", "num", "conc_num")
)$df_with_stats )$df_with_stats
reference_results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num")) reference_results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num"))
zscore_calculations_reference <- reference_results$calculations df_calculations_reference <- reference_results$calculations
zscore_interactions_reference <- reference_results$interactions df_interactions_reference <- reference_results$interactions
zscore_interactions_reference_joined <- reference_results$full_data
message("Setting missing deletion values to the highest theoretical value at each drug conc for L") message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2 df_deletion <- df_na_stats %>% # formerly X2
@@ -1394,38 +1376,39 @@ main <- function() {
group_vars = c("OrfRep", "Gene", "conc_num") group_vars = c("OrfRep", "Gene", "conc_num")
)$df_with_stats )$df_with_stats
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene")) deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene"))
zscore_calculations <- deletion_results$calculations df_calculations <- deletion_results$calculations
zscore_interactions <- deletion_results$interactions df_interactions <- deletion_results$interactions
zscore_interactions_joined <- deletion_results$full_data
# Writing Z-Scores to file # Writing Z-Scores to file
write.csv(zscore_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(zscore_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
write.csv(zscore_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(zscore_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
# Create interaction plots # Create interaction plots
message("Generating reference interaction plots") message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, "reference") reference_plot_configs <- generate_interaction_plot_configs(
df_reference_stats, df_calculations_reference, df_interactions_reference, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs) generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
message("Generating deletion interaction plots") message("Generating deletion interaction plots")
deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, "deletion") deletion_plot_configs <- generate_interaction_plot_configs(
df_deletion_stats, df_calculations, df_interactions, "deletion")
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs) generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
# Define conditions for enhancers and suppressors # Define conditions for enhancers and suppressors
# TODO Add to study config? # TODO Add to study config?
threshold <- 2 threshold <- 2
enhancer_condition_L <- zscore_interactions$Avg_Zscore_L >= threshold enhancer_condition_L <- df_interactions$Avg_Zscore_L >= threshold
suppressor_condition_L <- zscore_interactions$Avg_Zscore_L <= -threshold suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -threshold
enhancer_condition_K <- zscore_interactions$Avg_Zscore_K >= threshold enhancer_condition_K <- df_interactions$Avg_Zscore_K >= threshold
suppressor_condition_K <- zscore_interactions$Avg_Zscore_K <= -threshold suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -threshold
# Subset data # Subset data
enhancers_L <- zscore_interactions[enhancer_condition_L, ] enhancers_L <- df_interactions[enhancer_condition_L, ]
suppressors_L <- zscore_interactions[suppressor_condition_L, ] suppressors_L <- df_interactions[suppressor_condition_L, ]
enhancers_K <- zscore_interactions[enhancer_condition_K, ] enhancers_K <- df_interactions[enhancer_condition_K, ]
suppressors_K <- zscore_interactions[suppressor_condition_K, ] suppressors_K <- df_interactions[suppressor_condition_K, ]
# Save enhancers and suppressors # Save enhancers and suppressors
message("Writing enhancer/suppressor csv files") message("Writing enhancer/suppressor csv files")
@@ -1435,8 +1418,8 @@ main <- function() {
write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE) write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
# Combine conditions for enhancers and suppressors # Combine conditions for enhancers and suppressors
enhancers_and_suppressors_L <- zscore_interactions[enhancer_condition_L | suppressor_condition_L, ] enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
enhancers_and_suppressors_K <- zscore_interactions[enhancer_condition_K | suppressor_condition_K, ] enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
# Save combined enhancers and suppressors # Save combined enhancers and suppressors
write.csv(enhancers_and_suppressors_L, write.csv(enhancers_and_suppressors_L,
@@ -1446,10 +1429,10 @@ main <- function() {
# Handle linear model based enhancers and suppressors # Handle linear model based enhancers and suppressors
lm_threshold <- 2 # TODO add to study config? lm_threshold <- 2 # TODO add to study config?
enhancers_lm_L <- zscore_interactions[zscore_interactions$Z_lm_L >= lm_threshold, ] enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_threshold, ]
suppressors_lm_L <- zscore_interactions[zscore_interactions$Z_lm_L <= -lm_threshold, ] suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_threshold, ]
enhancers_lm_K <- zscore_interactions[zscore_interactions$Z_lm_K >= lm_threshold, ] enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_threshold, ]
suppressors_lm_K <- zscore_interactions[zscore_interactions$Z_lm_K <= -lm_threshold, ] suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_threshold, ]
# Save linear model based enhancers and suppressors # Save linear model based enhancers and suppressors
message("Writing linear model enhancer/suppressor csv files") message("Writing linear model enhancer/suppressor csv files")
@@ -1464,7 +1447,7 @@ main <- function() {
message("Generating rank plots") message("Generating rank plots")
rank_plot_configs <- generate_rank_plot_configs( rank_plot_configs <- generate_rank_plot_configs(
df = zscore_interactions_joined, df_interactions,
is_lm = FALSE, is_lm = FALSE,
adjust = TRUE adjust = TRUE
) )
@@ -1473,16 +1456,37 @@ main <- function() {
message("Generating ranked linear model plots") message("Generating ranked linear model plots")
rank_lm_plot_configs <- generate_rank_plot_configs( rank_lm_plot_configs <- generate_rank_plot_configs(
df = zscore_interactions_joined, df_interactions,
is_lm = TRUE, is_lm = TRUE,
adjust = TRUE adjust = TRUE
) )
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm", generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
plot_configs = rank_lm_plot_configs) plot_configs = rank_lm_plot_configs)
overlap_threshold <- 2
df_interactions_filtered <- df_interactions %>%
filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
mutate(
Overlap = case_when(
Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only",
Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only",
Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
TRUE ~ "No Effect"
),
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
)
message("Generating filtered ranked plots") message("Generating filtered ranked plots")
rank_plot_filtered_configs <- generate_rank_plot_configs( rank_plot_filtered_configs <- generate_rank_plot_configs(
df = zscore_interactions_filtered, df_interactions_filtered,
is_lm = FALSE, is_lm = FALSE,
adjust = FALSE, adjust = FALSE,
overlap_color = TRUE overlap_color = TRUE
@@ -1494,7 +1498,7 @@ main <- function() {
message("Generating filtered ranked linear model plots") message("Generating filtered ranked linear model plots")
rank_plot_lm_filtered_configs <- generate_rank_plot_configs( rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
df = zscore_interactions_filtered, df_interactions_filtered,
is_lm = TRUE, is_lm = TRUE,
adjust = FALSE, adjust = FALSE,
overlap_color = TRUE overlap_color = TRUE
@@ -1505,7 +1509,9 @@ main <- function() {
plot_configs = rank_plot_lm_filtered_configs) plot_configs = rank_plot_lm_filtered_configs)
message("Generating correlation curve parameter pair plots") message("Generating correlation curve parameter pair plots")
correlation_plot_configs <- generate_correlation_plot_configs(zscore_interactions_filtered) correlation_plot_configs <- generate_correlation_plot_configs(
df_interactions_filtered
)
generate_and_save_plots( generate_and_save_plots(
out_dir = out_dir, out_dir = out_dir,
filename = "correlation_cpps", filename = "correlation_cpps",