Move ranks into rank_plots function
This commit is contained in:
@@ -118,7 +118,7 @@ scale_colour_publication <- function(...) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Load the initial dataframe from the easy_results_file
|
# Load the initial dataframe from the easy_results_file
|
||||||
load_and_process_data <- function(easy_results_file, sd = 3) {
|
load_and_filter_data <- function(easy_results_file, sd = 3) {
|
||||||
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||||
|
|
||||||
df <- df %>%
|
df <- df %>%
|
||||||
@@ -656,7 +656,7 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|||||||
AUC = c(-6500, 6500)
|
AUC = c(-6500, 6500)
|
||||||
)
|
)
|
||||||
|
|
||||||
df_filtered <- process_data(df, variables, filter_na = TRUE, limits_map = limits_map)
|
df_filtered <- filter_data(df, variables, filter_na = TRUE, limits_map = limits_map)
|
||||||
|
|
||||||
# Define annotation label functions
|
# Define annotation label functions
|
||||||
generate_annotation_labels <- function(df, var, annotation_name) {
|
generate_annotation_labels <- function(df, var, annotation_name) {
|
||||||
@@ -747,11 +747,34 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|||||||
return(configs)
|
return(configs)
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, overlap_color = FALSE) {
|
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, overlap_color = FALSE) {
|
||||||
|
|
||||||
sd_bands <- c(1, 2, 3)
|
sd_bands <- c(1, 2, 3)
|
||||||
|
|
||||||
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
||||||
|
z_lm_cols <- paste0("Z_lm_", variables)
|
||||||
|
rank_avg_zscore_cols <- paste0("Rank_", variables)
|
||||||
|
rank_z_lm_cols <- paste0("Rank_lm_", variables)
|
||||||
|
|
||||||
configs <- list()
|
configs <- list()
|
||||||
|
|
||||||
|
if (adjust) {
|
||||||
|
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
||||||
|
df <- df %>%
|
||||||
|
mutate(
|
||||||
|
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
||||||
|
across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
||||||
|
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
||||||
|
df_ranked <- df %>%
|
||||||
|
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
||||||
|
|
||||||
|
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
||||||
|
df_ranked <- df_ranked %>%
|
||||||
|
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
||||||
|
|
||||||
# SD-based plots for L and K
|
# SD-based plots for L and K
|
||||||
for (variable in c("L", "K")) {
|
for (variable in c("L", "K")) {
|
||||||
@@ -768,12 +791,12 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|||||||
|
|
||||||
for (sd_band in sd_bands) {
|
for (sd_band in sd_bands) {
|
||||||
|
|
||||||
num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
|
num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
|
||||||
num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
||||||
|
|
||||||
# Annotated plot configuration
|
# Annotated plot configuration
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = df_filtered,
|
df = df_ranked,
|
||||||
x_var = rank_var,
|
x_var = rank_var,
|
||||||
y_var = zscore_var,
|
y_var = zscore_var,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
@@ -785,14 +808,14 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|||||||
alpha_negative = 0.3,
|
alpha_negative = 0.3,
|
||||||
annotations = list(
|
annotations = list(
|
||||||
list(
|
list(
|
||||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||||
y = 10,
|
y = 10,
|
||||||
label = paste("Deletion Enhancers =", num_enhancers),
|
label = paste("Deletion Enhancers =", num_enhancers),
|
||||||
hjust = 0.5,
|
hjust = 0.5,
|
||||||
vjust = 1
|
vjust = 1
|
||||||
),
|
),
|
||||||
list(
|
list(
|
||||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||||
y = -10,
|
y = -10,
|
||||||
label = paste("Deletion Suppressors =", num_suppressors),
|
label = paste("Deletion Suppressors =", num_suppressors),
|
||||||
hjust = 0.5,
|
hjust = 0.5,
|
||||||
@@ -808,7 +831,7 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|||||||
|
|
||||||
# Non-Annotated Plot Configuration
|
# Non-Annotated Plot Configuration
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = df_filtered,
|
df = df_ranked,
|
||||||
x_var = rank_var,
|
x_var = rank_var,
|
||||||
y_var = zscore_var,
|
y_var = zscore_var,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
@@ -849,30 +872,29 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
|||||||
rectangles <- NULL
|
rectangles <- NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
# Fit linear model
|
# Fit the linear model
|
||||||
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_filtered)
|
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
|
||||||
lm_summary <- summary(lm_model)
|
|
||||||
|
|
||||||
# Extract intercept and slope from the model coefficients
|
# Extract intercept and slope from the model coefficients
|
||||||
intercept <- coef(lm_model)[1]
|
intercept <- coef(lm_model)[1]
|
||||||
slope <- coef(lm_model)[2]
|
slope <- coef(lm_model)[2]
|
||||||
|
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = df_filtered,
|
df = df_ranked,
|
||||||
x_var = x_var,
|
x_var = x_var,
|
||||||
y_var = y_var,
|
y_var = y_var,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
title = title,
|
title = title,
|
||||||
annotations = list(
|
annotations = list(
|
||||||
list(
|
list(
|
||||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||||
y = 10,
|
y = 10,
|
||||||
label = paste("Deletion Enhancers =", num_enhancers),
|
label = paste("Deletion Enhancers =", num_enhancers),
|
||||||
hjust = 0.5,
|
hjust = 0.5,
|
||||||
vjust = 1
|
vjust = 1
|
||||||
),
|
),
|
||||||
list(
|
list(
|
||||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||||
y = -10,
|
y = -10,
|
||||||
label = paste("Deletion Suppressors =", num_suppressors),
|
label = paste("Deletion Suppressors =", num_suppressors),
|
||||||
hjust = 0.5,
|
hjust = 0.5,
|
||||||
@@ -955,8 +977,7 @@ generate_correlation_plot_configs <- function(df) {
|
|||||||
return(configs)
|
return(configs)
|
||||||
}
|
}
|
||||||
|
|
||||||
process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, adjust = FALSE,
|
filter_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, limits_map = NULL) {
|
||||||
rank = FALSE, limits_map = NULL) {
|
|
||||||
|
|
||||||
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
||||||
z_lm_cols <- paste0("Z_lm_", variables)
|
z_lm_cols <- paste0("Z_lm_", variables)
|
||||||
@@ -986,29 +1007,6 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (adjust) {
|
|
||||||
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
|
||||||
df <- df %>%
|
|
||||||
mutate(
|
|
||||||
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
|
||||||
across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate and add rank columns
|
|
||||||
# TODO probably should be moved to separate function
|
|
||||||
if (rank) {
|
|
||||||
message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
|
||||||
|
|
||||||
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
|
||||||
df <- df %>%
|
|
||||||
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
|
||||||
|
|
||||||
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
|
||||||
df <- df %>%
|
|
||||||
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
|
||||||
}
|
|
||||||
|
|
||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1028,7 +1026,7 @@ main <- function() {
|
|||||||
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
||||||
|
|
||||||
message("Loading and filtering data for experiment: ", exp_name)
|
message("Loading and filtering data for experiment: ", exp_name)
|
||||||
df <- load_and_process_data(args$easy_results_file, sd = exp_sd) %>%
|
df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
|
||||||
update_gene_names(args$sgd_gene_list) %>%
|
update_gene_names(args$sgd_gene_list) %>%
|
||||||
as_tibble()
|
as_tibble()
|
||||||
|
|
||||||
@@ -1399,25 +1397,21 @@ main <- function() {
|
|||||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
||||||
|
|
||||||
message("Generating rank plots")
|
message("Generating rank plots")
|
||||||
# Formerly InteractionScores_AdjustMissing
|
rank_plot_configs <- generate_rank_plot_configs(
|
||||||
zscores_interactions_joined_ranked <- process_data(
|
|
||||||
df = zscores_interactions_joined,
|
df = zscores_interactions_joined,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
adjust = TRUE,
|
is_lm = FALSE,
|
||||||
rank = TRUE)
|
adjust = TRUE
|
||||||
rank_plot_configs <- generate_rank_plot_configs(
|
|
||||||
df = zscores_interactions_joined_ranked,
|
|
||||||
variables = interaction_vars,
|
|
||||||
is_lm = FALSE
|
|
||||||
)
|
)
|
||||||
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
|
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
|
||||||
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
||||||
|
|
||||||
message("Generating ranked linear model plots")
|
message("Generating ranked linear model plots")
|
||||||
rank_lm_plot_configs <- generate_rank_plot_configs(
|
rank_lm_plot_configs <- generate_rank_plot_configs(
|
||||||
df = zscores_interactions_joined_ranked,
|
df = zscores_interactions_joined,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
is_lm = TRUE
|
is_lm = TRUE,
|
||||||
|
adjust = TRUE
|
||||||
)
|
)
|
||||||
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
|
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
|
||||||
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
||||||
@@ -1444,21 +1438,15 @@ main <- function() {
|
|||||||
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
||||||
)
|
)
|
||||||
|
|
||||||
# Re-rank
|
message("Generating filtered ranked plots")
|
||||||
zscores_interactions_filtered_ranked <- process_data(
|
|
||||||
df = zscores_interactions_filtered,
|
|
||||||
variables = interaction_vars,
|
|
||||||
rank = TRUE
|
|
||||||
)
|
|
||||||
|
|
||||||
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
||||||
df = zscores_interactions_filtered_ranked,
|
df = zscores_interactions_filtered_ranked,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
is_lm = FALSE,
|
is_lm = FALSE,
|
||||||
|
adjust = FALSE,
|
||||||
overlap_color = TRUE
|
overlap_color = TRUE
|
||||||
)
|
)
|
||||||
|
|
||||||
message("Generating filtered ranked plots")
|
|
||||||
generate_and_save_plots(
|
generate_and_save_plots(
|
||||||
out_dir = out_dir,
|
out_dir = out_dir,
|
||||||
filename = "RankPlots_na_rm",
|
filename = "RankPlots_na_rm",
|
||||||
@@ -1470,6 +1458,7 @@ main <- function() {
|
|||||||
df = zscores_interactions_filtered_ranked,
|
df = zscores_interactions_filtered_ranked,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
is_lm = TRUE,
|
is_lm = TRUE,
|
||||||
|
adjust = FALSE,
|
||||||
overlap_color = TRUE
|
overlap_color = TRUE
|
||||||
)
|
)
|
||||||
generate_and_save_plots(
|
generate_and_save_plots(
|
||||||
|
|||||||
Reference in New Issue
Block a user