Move ranks into rank_plots function
This commit is contained in:
@@ -118,7 +118,7 @@ scale_colour_publication <- function(...) {
|
||||
}
|
||||
|
||||
# Load the initial dataframe from the easy_results_file
|
||||
load_and_process_data <- function(easy_results_file, sd = 3) {
|
||||
load_and_filter_data <- function(easy_results_file, sd = 3) {
|
||||
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||
|
||||
df <- df %>%
|
||||
@@ -656,7 +656,7 @@ generate_interaction_plot_configs <- function(df, variables) {
|
||||
AUC = c(-6500, 6500)
|
||||
)
|
||||
|
||||
df_filtered <- process_data(df, variables, filter_na = TRUE, limits_map = limits_map)
|
||||
df_filtered <- filter_data(df, variables, filter_na = TRUE, limits_map = limits_map)
|
||||
|
||||
# Define annotation label functions
|
||||
generate_annotation_labels <- function(df, var, annotation_name) {
|
||||
@@ -747,11 +747,34 @@ generate_interaction_plot_configs <- function(df, variables) {
|
||||
return(configs)
|
||||
}
|
||||
|
||||
generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, overlap_color = FALSE) {
|
||||
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, overlap_color = FALSE) {
|
||||
|
||||
sd_bands <- c(1, 2, 3)
|
||||
|
||||
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
||||
z_lm_cols <- paste0("Z_lm_", variables)
|
||||
rank_avg_zscore_cols <- paste0("Rank_", variables)
|
||||
rank_z_lm_cols <- paste0("Rank_lm_", variables)
|
||||
|
||||
configs <- list()
|
||||
|
||||
if (adjust) {
|
||||
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
||||
df <- df %>%
|
||||
mutate(
|
||||
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
||||
across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
|
||||
)
|
||||
}
|
||||
|
||||
message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
||||
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
||||
df_ranked <- df %>%
|
||||
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
||||
|
||||
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
||||
df_ranked <- df_ranked %>%
|
||||
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
||||
|
||||
# SD-based plots for L and K
|
||||
for (variable in c("L", "K")) {
|
||||
@@ -768,12 +791,12 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
||||
|
||||
for (sd_band in sd_bands) {
|
||||
|
||||
num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
|
||||
num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
||||
num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
|
||||
num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
||||
|
||||
# Annotated plot configuration
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df_filtered,
|
||||
df = df_ranked,
|
||||
x_var = rank_var,
|
||||
y_var = zscore_var,
|
||||
plot_type = "scatter",
|
||||
@@ -785,14 +808,14 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
||||
alpha_negative = 0.3,
|
||||
annotations = list(
|
||||
list(
|
||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||
y = 10,
|
||||
label = paste("Deletion Enhancers =", num_enhancers),
|
||||
hjust = 0.5,
|
||||
vjust = 1
|
||||
),
|
||||
list(
|
||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||
y = -10,
|
||||
label = paste("Deletion Suppressors =", num_suppressors),
|
||||
hjust = 0.5,
|
||||
@@ -808,7 +831,7 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
||||
|
||||
# Non-Annotated Plot Configuration
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df_filtered,
|
||||
df = df_ranked,
|
||||
x_var = rank_var,
|
||||
y_var = zscore_var,
|
||||
plot_type = "scatter",
|
||||
@@ -849,30 +872,29 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
|
||||
rectangles <- NULL
|
||||
}
|
||||
|
||||
# Fit linear model
|
||||
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_filtered)
|
||||
lm_summary <- summary(lm_model)
|
||||
# Fit the linear model
|
||||
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
|
||||
|
||||
# Extract intercept and slope from the model coefficients
|
||||
intercept <- coef(lm_model)[1]
|
||||
slope <- coef(lm_model)[2]
|
||||
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df_filtered,
|
||||
df = df_ranked,
|
||||
x_var = x_var,
|
||||
y_var = y_var,
|
||||
plot_type = "scatter",
|
||||
title = title,
|
||||
annotations = list(
|
||||
list(
|
||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||
y = 10,
|
||||
label = paste("Deletion Enhancers =", num_enhancers),
|
||||
hjust = 0.5,
|
||||
vjust = 1
|
||||
),
|
||||
list(
|
||||
x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||
y = -10,
|
||||
label = paste("Deletion Suppressors =", num_suppressors),
|
||||
hjust = 0.5,
|
||||
@@ -955,8 +977,7 @@ generate_correlation_plot_configs <- function(df) {
|
||||
return(configs)
|
||||
}
|
||||
|
||||
process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, adjust = FALSE,
|
||||
rank = FALSE, limits_map = NULL) {
|
||||
filter_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, limits_map = NULL) {
|
||||
|
||||
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
||||
z_lm_cols <- paste0("Z_lm_", variables)
|
||||
@@ -986,29 +1007,6 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
|
||||
}
|
||||
}
|
||||
|
||||
if (adjust) {
|
||||
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
||||
df <- df %>%
|
||||
mutate(
|
||||
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
||||
across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
|
||||
)
|
||||
}
|
||||
|
||||
# Calculate and add rank columns
|
||||
# TODO probably should be moved to separate function
|
||||
if (rank) {
|
||||
message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
||||
|
||||
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
||||
df <- df %>%
|
||||
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
||||
|
||||
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
||||
df <- df %>%
|
||||
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
||||
}
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
@@ -1028,7 +1026,7 @@ main <- function() {
|
||||
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
||||
|
||||
message("Loading and filtering data for experiment: ", exp_name)
|
||||
df <- load_and_process_data(args$easy_results_file, sd = exp_sd) %>%
|
||||
df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
|
||||
update_gene_names(args$sgd_gene_list) %>%
|
||||
as_tibble()
|
||||
|
||||
@@ -1399,25 +1397,21 @@ main <- function() {
|
||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
||||
|
||||
message("Generating rank plots")
|
||||
# Formerly InteractionScores_AdjustMissing
|
||||
zscores_interactions_joined_ranked <- process_data(
|
||||
rank_plot_configs <- generate_rank_plot_configs(
|
||||
df = zscores_interactions_joined,
|
||||
variables = interaction_vars,
|
||||
adjust = TRUE,
|
||||
rank = TRUE)
|
||||
rank_plot_configs <- generate_rank_plot_configs(
|
||||
df = zscores_interactions_joined_ranked,
|
||||
variables = interaction_vars,
|
||||
is_lm = FALSE
|
||||
is_lm = FALSE,
|
||||
adjust = TRUE
|
||||
)
|
||||
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
|
||||
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
||||
|
||||
message("Generating ranked linear model plots")
|
||||
rank_lm_plot_configs <- generate_rank_plot_configs(
|
||||
df = zscores_interactions_joined_ranked,
|
||||
df = zscores_interactions_joined,
|
||||
variables = interaction_vars,
|
||||
is_lm = TRUE
|
||||
is_lm = TRUE,
|
||||
adjust = TRUE
|
||||
)
|
||||
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
|
||||
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
||||
@@ -1444,21 +1438,15 @@ main <- function() {
|
||||
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
||||
)
|
||||
|
||||
# Re-rank
|
||||
zscores_interactions_filtered_ranked <- process_data(
|
||||
df = zscores_interactions_filtered,
|
||||
variables = interaction_vars,
|
||||
rank = TRUE
|
||||
)
|
||||
|
||||
message("Generating filtered ranked plots")
|
||||
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
||||
df = zscores_interactions_filtered_ranked,
|
||||
variables = interaction_vars,
|
||||
is_lm = FALSE,
|
||||
adjust = FALSE,
|
||||
overlap_color = TRUE
|
||||
)
|
||||
|
||||
message("Generating filtered ranked plots")
|
||||
generate_and_save_plots(
|
||||
out_dir = out_dir,
|
||||
filename = "RankPlots_na_rm",
|
||||
@@ -1470,6 +1458,7 @@ main <- function() {
|
||||
df = zscores_interactions_filtered_ranked,
|
||||
variables = interaction_vars,
|
||||
is_lm = TRUE,
|
||||
adjust = FALSE,
|
||||
overlap_color = TRUE
|
||||
)
|
||||
generate_and_save_plots(
|
||||
|
||||
Reference in New Issue
Block a user