|
@@ -118,7 +118,7 @@ scale_colour_publication <- function(...) {
|
|
}
|
|
}
|
|
|
|
|
|
# Load the initial dataframe from the easy_results_file
|
|
# Load the initial dataframe from the easy_results_file
|
|
-load_and_process_data <- function(easy_results_file, sd = 3) {
|
|
|
|
|
|
+load_and_filter_data <- function(easy_results_file, sd = 3) {
|
|
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
|
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
|
|
|
|
|
df <- df %>%
|
|
df <- df %>%
|
|
@@ -656,7 +656,7 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
AUC = c(-6500, 6500)
|
|
AUC = c(-6500, 6500)
|
|
)
|
|
)
|
|
|
|
|
|
- df_filtered <- process_data(df, variables, filter_na = TRUE, limits_map = limits_map)
|
|
|
|
|
|
+ df_filtered <- filter_data(df, variables, filter_na = TRUE, limits_map = limits_map)
|
|
|
|
|
|
# Define annotation label functions
|
|
# Define annotation label functions
|
|
generate_annotation_labels <- function(df, var, annotation_name) {
|
|
generate_annotation_labels <- function(df, var, annotation_name) {
|
|
@@ -747,11 +747,34 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
return(configs)
|
|
return(configs)
|
|
}
|
|
}
|
|
|
|
|
|
-generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, overlap_color = FALSE) {
|
|
|
|
|
|
+generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, overlap_color = FALSE) {
|
|
|
|
|
|
sd_bands <- c(1, 2, 3)
|
|
sd_bands <- c(1, 2, 3)
|
|
|
|
+
|
|
|
|
+ avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
|
|
|
+ z_lm_cols <- paste0("Z_lm_", variables)
|
|
|
|
+ rank_avg_zscore_cols <- paste0("Rank_", variables)
|
|
|
|
+ rank_z_lm_cols <- paste0("Rank_lm_", variables)
|
|
|
|
|
|
configs <- list()
|
|
configs <- list()
|
|
|
|
+
|
|
|
|
+ if (adjust) {
|
|
|
|
+ message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
|
|
|
+ df <- df %>%
|
|
|
|
+ mutate(
|
|
|
|
+ across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
|
|
|
+ across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
|
|
|
|
+ )
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
|
|
|
+ rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
|
|
|
+ df_ranked <- df %>%
|
|
|
|
+ mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
|
|
|
+
|
|
|
|
+ rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
|
|
|
+ df_ranked <- df_ranked %>%
|
|
|
|
+ mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
|
|
|
|
|
# SD-based plots for L and K
|
|
# SD-based plots for L and K
|
|
for (variable in c("L", "K")) {
|
|
for (variable in c("L", "K")) {
|
|
@@ -768,12 +791,12 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|
|
|
|
|
for (sd_band in sd_bands) {
|
|
for (sd_band in sd_bands) {
|
|
|
|
|
|
- num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
|
|
|
|
- num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
|
|
|
|
|
+ num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
|
|
|
|
+ num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
|
|
|
|
|
# Annotated plot configuration
|
|
# Annotated plot configuration
|
|
configs[[length(configs) + 1]] <- list(
|
|
configs[[length(configs) + 1]] <- list(
|
|
- df = df_filtered,
|
|
|
|
|
|
+ df = df_ranked,
|
|
x_var = rank_var,
|
|
x_var = rank_var,
|
|
y_var = zscore_var,
|
|
y_var = zscore_var,
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
@@ -785,14 +808,14 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|
alpha_negative = 0.3,
|
|
alpha_negative = 0.3,
|
|
annotations = list(
|
|
annotations = list(
|
|
list(
|
|
list(
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
|
|
+ x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
|
y = 10,
|
|
y = 10,
|
|
label = paste("Deletion Enhancers =", num_enhancers),
|
|
label = paste("Deletion Enhancers =", num_enhancers),
|
|
hjust = 0.5,
|
|
hjust = 0.5,
|
|
vjust = 1
|
|
vjust = 1
|
|
),
|
|
),
|
|
list(
|
|
list(
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
|
|
+ x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
|
y = -10,
|
|
y = -10,
|
|
label = paste("Deletion Suppressors =", num_suppressors),
|
|
label = paste("Deletion Suppressors =", num_suppressors),
|
|
hjust = 0.5,
|
|
hjust = 0.5,
|
|
@@ -808,7 +831,7 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|
|
|
|
|
# Non-Annotated Plot Configuration
|
|
# Non-Annotated Plot Configuration
|
|
configs[[length(configs) + 1]] <- list(
|
|
configs[[length(configs) + 1]] <- list(
|
|
- df = df_filtered,
|
|
|
|
|
|
+ df = df_ranked,
|
|
x_var = rank_var,
|
|
x_var = rank_var,
|
|
y_var = zscore_var,
|
|
y_var = zscore_var,
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
@@ -849,30 +872,29 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|
rectangles <- NULL
|
|
rectangles <- NULL
|
|
}
|
|
}
|
|
|
|
|
|
- # Fit linear model
|
|
|
|
- lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_filtered)
|
|
|
|
- lm_summary <- summary(lm_model)
|
|
|
|
|
|
+ # Fit the linear model
|
|
|
|
+ lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
|
|
|
|
|
|
# Extract intercept and slope from the model coefficients
|
|
# Extract intercept and slope from the model coefficients
|
|
intercept <- coef(lm_model)[1]
|
|
intercept <- coef(lm_model)[1]
|
|
slope <- coef(lm_model)[2]
|
|
slope <- coef(lm_model)[2]
|
|
|
|
|
|
configs[[length(configs) + 1]] <- list(
|
|
configs[[length(configs) + 1]] <- list(
|
|
- df = df_filtered,
|
|
|
|
|
|
+ df = df_ranked,
|
|
x_var = x_var,
|
|
x_var = x_var,
|
|
y_var = y_var,
|
|
y_var = y_var,
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
title = title,
|
|
title = title,
|
|
annotations = list(
|
|
annotations = list(
|
|
list(
|
|
list(
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
|
|
+ x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
|
y = 10,
|
|
y = 10,
|
|
label = paste("Deletion Enhancers =", num_enhancers),
|
|
label = paste("Deletion Enhancers =", num_enhancers),
|
|
hjust = 0.5,
|
|
hjust = 0.5,
|
|
vjust = 1
|
|
vjust = 1
|
|
),
|
|
),
|
|
list(
|
|
list(
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
|
|
+ x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
|
y = -10,
|
|
y = -10,
|
|
label = paste("Deletion Suppressors =", num_suppressors),
|
|
label = paste("Deletion Suppressors =", num_suppressors),
|
|
hjust = 0.5,
|
|
hjust = 0.5,
|
|
@@ -955,8 +977,7 @@ generate_correlation_plot_configs <- function(df) {
|
|
return(configs)
|
|
return(configs)
|
|
}
|
|
}
|
|
|
|
|
|
-process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, adjust = FALSE,
|
|
|
|
- rank = FALSE, limits_map = NULL) {
|
|
|
|
|
|
+filter_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, limits_map = NULL) {
|
|
|
|
|
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
|
z_lm_cols <- paste0("Z_lm_", variables)
|
|
z_lm_cols <- paste0("Z_lm_", variables)
|
|
@@ -986,29 +1007,6 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- if (adjust) {
|
|
|
|
- message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
|
|
|
- df <- df %>%
|
|
|
|
- mutate(
|
|
|
|
- across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
|
|
|
- across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
|
|
|
|
- )
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- # Calculate and add rank columns
|
|
|
|
- # TODO probably should be moved to separate function
|
|
|
|
- if (rank) {
|
|
|
|
- message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
|
|
|
-
|
|
|
|
- rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
|
|
|
- df <- df %>%
|
|
|
|
- mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
|
|
|
-
|
|
|
|
- rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
|
|
|
- df <- df %>%
|
|
|
|
- mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
return(df)
|
|
return(df)
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1028,7 +1026,7 @@ main <- function() {
|
|
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
|
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
|
|
|
|
|
message("Loading and filtering data for experiment: ", exp_name)
|
|
message("Loading and filtering data for experiment: ", exp_name)
|
|
- df <- load_and_process_data(args$easy_results_file, sd = exp_sd) %>%
|
|
|
|
|
|
+ df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
|
|
update_gene_names(args$sgd_gene_list) %>%
|
|
update_gene_names(args$sgd_gene_list) %>%
|
|
as_tibble()
|
|
as_tibble()
|
|
|
|
|
|
@@ -1399,25 +1397,21 @@ main <- function() {
|
|
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
|
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
|
|
|
|
|
message("Generating rank plots")
|
|
message("Generating rank plots")
|
|
- # Formerly InteractionScores_AdjustMissing
|
|
|
|
- zscores_interactions_joined_ranked <- process_data(
|
|
|
|
- df = zscores_interactions_joined,
|
|
|
|
- variables = interaction_vars,
|
|
|
|
- adjust = TRUE,
|
|
|
|
- rank = TRUE)
|
|
|
|
rank_plot_configs <- generate_rank_plot_configs(
|
|
rank_plot_configs <- generate_rank_plot_configs(
|
|
- df = zscores_interactions_joined_ranked,
|
|
|
|
|
|
+ df = zscores_interactions_joined,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
- is_lm = FALSE
|
|
|
|
|
|
+ is_lm = FALSE,
|
|
|
|
+ adjust = TRUE
|
|
)
|
|
)
|
|
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
|
|
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
|
|
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
|
|
|
|
message("Generating ranked linear model plots")
|
|
message("Generating ranked linear model plots")
|
|
rank_lm_plot_configs <- generate_rank_plot_configs(
|
|
rank_lm_plot_configs <- generate_rank_plot_configs(
|
|
- df = zscores_interactions_joined_ranked,
|
|
|
|
|
|
+ df = zscores_interactions_joined,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
- is_lm = TRUE
|
|
|
|
|
|
+ is_lm = TRUE,
|
|
|
|
+ adjust = TRUE
|
|
)
|
|
)
|
|
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
|
|
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
|
|
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
@@ -1444,21 +1438,15 @@ main <- function() {
|
|
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
|
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
|
)
|
|
)
|
|
|
|
|
|
- # Re-rank
|
|
|
|
- zscores_interactions_filtered_ranked <- process_data(
|
|
|
|
- df = zscores_interactions_filtered,
|
|
|
|
- variables = interaction_vars,
|
|
|
|
- rank = TRUE
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
|
|
+ message("Generating filtered ranked plots")
|
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
|
df = zscores_interactions_filtered_ranked,
|
|
df = zscores_interactions_filtered_ranked,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
is_lm = FALSE,
|
|
is_lm = FALSE,
|
|
|
|
+ adjust = FALSE,
|
|
overlap_color = TRUE
|
|
overlap_color = TRUE
|
|
)
|
|
)
|
|
|
|
|
|
- message("Generating filtered ranked plots")
|
|
|
|
generate_and_save_plots(
|
|
generate_and_save_plots(
|
|
out_dir = out_dir,
|
|
out_dir = out_dir,
|
|
filename = "RankPlots_na_rm",
|
|
filename = "RankPlots_na_rm",
|
|
@@ -1470,6 +1458,7 @@ main <- function() {
|
|
df = zscores_interactions_filtered_ranked,
|
|
df = zscores_interactions_filtered_ranked,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
is_lm = TRUE,
|
|
is_lm = TRUE,
|
|
|
|
+ adjust = FALSE,
|
|
overlap_color = TRUE
|
|
overlap_color = TRUE
|
|
)
|
|
)
|
|
generate_and_save_plots(
|
|
generate_and_save_plots(
|