Move ranks into rank_plots function

This commit is contained in:
2024-09-25 01:57:14 -04:00
parent 0836dc70d2
commit fbbe351cbb

View File

@@ -118,7 +118,7 @@ scale_colour_publication <- function(...) {
}
# Load the initial dataframe from the easy_results_file
load_and_process_data <- function(easy_results_file, sd = 3) {
load_and_filter_data <- function(easy_results_file, sd = 3) {
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
df <- df %>%
@@ -656,7 +656,7 @@ generate_interaction_plot_configs <- function(df, variables) {
AUC = c(-6500, 6500)
)
df_filtered <- process_data(df, variables, filter_na = TRUE, limits_map = limits_map)
df_filtered <- filter_data(df, variables, filter_na = TRUE, limits_map = limits_map)
# Define annotation label functions
generate_annotation_labels <- function(df, var, annotation_name) {
@@ -747,11 +747,34 @@ generate_interaction_plot_configs <- function(df, variables) {
return(configs)
}
generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, overlap_color = FALSE) {
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, overlap_color = FALSE) {
sd_bands <- c(1, 2, 3)
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
z_lm_cols <- paste0("Z_lm_", variables)
rank_avg_zscore_cols <- paste0("Rank_", variables)
rank_z_lm_cols <- paste0("Rank_lm_", variables)
configs <- list()
if (adjust) {
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
df <- df %>%
mutate(
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
)
}
message("Calculating ranks for Avg_Zscore and Z_lm columns")
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
df_ranked <- df %>%
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
df_ranked <- df_ranked %>%
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
# SD-based plots for L and K
for (variable in c("L", "K")) {
@@ -768,12 +791,12 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
for (sd_band in sd_bands) {
num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)
# Annotated plot configuration
configs[[length(configs) + 1]] <- list(
df = df_filtered,
df = df_ranked,
x_var = rank_var,
y_var = zscore_var,
plot_type = "scatter",
@@ -785,14 +808,14 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
alpha_negative = 0.3,
annotations = list(
list(
x = median(df_filtered[[rank_var]], na.rm = TRUE),
x = median(df_ranked[[rank_var]], na.rm = TRUE),
y = 10,
label = paste("Deletion Enhancers =", num_enhancers),
hjust = 0.5,
vjust = 1
),
list(
x = median(df_filtered[[rank_var]], na.rm = TRUE),
x = median(df_ranked[[rank_var]], na.rm = TRUE),
y = -10,
label = paste("Deletion Suppressors =", num_suppressors),
hjust = 0.5,
@@ -808,7 +831,7 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
# Non-Annotated Plot Configuration
configs[[length(configs) + 1]] <- list(
df = df_filtered,
df = df_ranked,
x_var = rank_var,
y_var = zscore_var,
plot_type = "scatter",
@@ -849,30 +872,29 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov
rectangles <- NULL
}
# Fit linear model
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_filtered)
lm_summary <- summary(lm_model)
# Fit the linear model
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
# Extract intercept and slope from the model coefficients
intercept <- coef(lm_model)[1]
slope <- coef(lm_model)[2]
configs[[length(configs) + 1]] <- list(
df = df_filtered,
df = df_ranked,
x_var = x_var,
y_var = y_var,
plot_type = "scatter",
title = title,
annotations = list(
list(
x = median(df_filtered[[rank_var]], na.rm = TRUE),
x = median(df_ranked[[rank_var]], na.rm = TRUE),
y = 10,
label = paste("Deletion Enhancers =", num_enhancers),
hjust = 0.5,
vjust = 1
),
list(
x = median(df_filtered[[rank_var]], na.rm = TRUE),
x = median(df_ranked[[rank_var]], na.rm = TRUE),
y = -10,
label = paste("Deletion Suppressors =", num_suppressors),
hjust = 0.5,
@@ -955,8 +977,7 @@ generate_correlation_plot_configs <- function(df) {
return(configs)
}
process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, adjust = FALSE,
rank = FALSE, limits_map = NULL) {
filter_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, limits_map = NULL) {
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
z_lm_cols <- paste0("Z_lm_", variables)
@@ -986,29 +1007,6 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
}
}
if (adjust) {
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
df <- df %>%
mutate(
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
)
}
# Calculate and add rank columns
# TODO probably should be moved to separate function
if (rank) {
message("Calculating ranks for Avg_Zscore and Z_lm columns")
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
df <- df %>%
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
df <- df %>%
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
}
return(df)
}
@@ -1028,7 +1026,7 @@ main <- function() {
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
message("Loading and filtering data for experiment: ", exp_name)
df <- load_and_process_data(args$easy_results_file, sd = exp_sd) %>%
df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
update_gene_names(args$sgd_gene_list) %>%
as_tibble()
@@ -1399,25 +1397,21 @@ main <- function() {
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
message("Generating rank plots")
# Formerly InteractionScores_AdjustMissing
zscores_interactions_joined_ranked <- process_data(
rank_plot_configs <- generate_rank_plot_configs(
df = zscores_interactions_joined,
variables = interaction_vars,
adjust = TRUE,
rank = TRUE)
rank_plot_configs <- generate_rank_plot_configs(
df = zscores_interactions_joined_ranked,
variables = interaction_vars,
is_lm = FALSE
is_lm = FALSE,
adjust = TRUE
)
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
message("Generating ranked linear model plots")
rank_lm_plot_configs <- generate_rank_plot_configs(
df = zscores_interactions_joined_ranked,
df = zscores_interactions_joined,
variables = interaction_vars,
is_lm = TRUE
is_lm = TRUE,
adjust = TRUE
)
generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
@@ -1444,21 +1438,15 @@ main <- function() {
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
)
# Re-rank
zscores_interactions_filtered_ranked <- process_data(
df = zscores_interactions_filtered,
variables = interaction_vars,
rank = TRUE
)
message("Generating filtered ranked plots")
rank_plot_filtered_configs <- generate_rank_plot_configs(
df = zscores_interactions_filtered_ranked,
variables = interaction_vars,
is_lm = FALSE,
adjust = FALSE,
overlap_color = TRUE
)
message("Generating filtered ranked plots")
generate_and_save_plots(
out_dir = out_dir,
filename = "RankPlots_na_rm",
@@ -1470,6 +1458,7 @@ main <- function() {
df = zscores_interactions_filtered_ranked,
variables = interaction_vars,
is_lm = TRUE,
adjust = FALSE,
overlap_color = TRUE
)
generate_and_save_plots(