Refactor rank plots

This commit is contained in:
2024-09-15 19:32:25 -04:00
parent eb2e1ae7d6
commit e00df4be45

View File

@@ -567,37 +567,6 @@ generate_box_plot <- function(plot, config) {
return(plot)
}
# Adjust missing values and calculate ranks
adjust_missing_and_rank <- function(df, variables) {
# Loop over each variable
for (var in variables) {
# Construct column names
avg_zscore_col <- paste0("Avg_Zscore_", var)
z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var)
rank_lm_col <- paste0("Rank_lm_", var)
# Check if the columns exist in the data frame
if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
# Adjust missing values by replacing NA with 0.001
df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
# Compute ranks and create new columns
df[[rank_col]] <- rank(df[[avg_zscore_col]])
df[[rank_lm_col]] <- rank(df[[z_lm_col]])
} else {
warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in data frame"))
}
# Print some debugging output
print(df %>% select("OrfRep", "Gene", "conc_num", "conc_num_factor",
avg_zscore_col, z_lm_col, rank_col, rank_lm_col))
}
return(df)
}
generate_interaction_plot_configs <- function(df, variables) {
configs <- list()
@@ -738,46 +707,90 @@ generate_interaction_plot_configs <- function(df, variables) {
))
}
generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) {
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm = FALSE) {
# Adjust missing values and compute ranks for each interaction variable
for (var in interaction_vars) {
avg_zscore_col <- paste0("Avg_Zscore_", var)
z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var)
rank_lm_col <- paste0("Rank_lm_", var)
if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
# Replace NA with 0.001 for interaction variables
df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
# Compute ranks for interaction variables
df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
} else {
warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in the data frame"))
}
}
# Initialize list to store plot configurations
configs <- list()
plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for"
# Generate plot configurations for rank variables
for (var in rank_vars) {
if (is_lm) {
rank_var <- paste0("Rank_lm_", var)
zscore_var <- paste0("Z_lm_", var)
plot_title_prefix <- "Interaction Z score vs. Rank for"
} else {
rank_var <- paste0("Rank_", var)
zscore_var <- paste0("Avg_Zscore_", var)
plot_title_prefix <- "Average Z score vs. Rank for"
}
# Single config with all sd bands
configs[[length(configs) + 1]] <- list(
df = df,
x_var = rank_var,
y_var = zscore_var,
plot_type = "scatter",
title = paste(plot_title_prefix, var, "Rank Plot"),
sd_band = c(1, 2, 3), # Pass all sd bands at once
enhancer_label = list(
x = nrow(df) / 2, y = 10,
label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= 1, ])) # Example for the first SD band
),
suppressor_label = list(
x = nrow(df) / 2, y = -10,
label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -1, ]))
),
shape = 3,
size = 0.1
)
enhancers_count <- nrow(df[df[[zscore_var]] >= 1, ])
suppressors_count <- nrow(df[df[[zscore_var]] <= -1, ])
# Non-annotated version
configs[[length(configs) + 1]] <- list(
df = df,
x_var = rank_var,
y_var = zscore_var,
plot_type = "scatter",
title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
sd_band = c(1, 2, 3),
enhancer_label = NULL,
suppressor_label = NULL,
shape = 3,
size = 0.1
)
# Create Annotated Plot Configuration for rank variables
config_annotated <- list(
df = df,
x_var = rank_var,
y_var = zscore_var,
plot_type = "scatter",
title = paste(plot_title_prefix, var, "Rank Plot"),
sd_band = c(1, 2, 3),
enhancer_label = list(
x = nrow(df) / 2,
y = 10,
label = paste("Deletion Enhancers =", enhancers_count)
),
suppressor_label = list(
x = nrow(df) / 2,
y = -10,
label = paste("Deletion Suppressors =", suppressors_count)
),
shape = 3,
size = 0.1
)
return(configs)
# Create Non-Annotated Plot Configuration for rank variables
config_no_annotated <- list(
df = df,
x_var = rank_var,
y_var = zscore_var,
plot_type = "scatter",
title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
sd_band = c(1, 2, 3),
enhancer_label = NULL,
suppressor_label = NULL,
shape = 3,
size = 0.1
)
# Append configurations to the list
configs[[length(configs) + 1]] <- config_annotated
configs[[length(configs) + 1]] <- config_no_annotated
}
return(list(
adjusted_df = df,
plot_configs = configs
))
}
generate_correlation_plot_configs <- function(df, variables) {
@@ -1214,24 +1227,41 @@ main <- function() {
write.csv(suppressors_lm_K,
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
# TODO needs explanation
message("Adjusting and ranking interactions scores")
zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions, variables = interaction_vars)
message("Generating rank plots")
rank_plot_configs <- c(
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_L", "Avg_Zscore_L", "L"),
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
)
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
rank_vars <- c("L", "K")
rank_lm_plot_config <- c(
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)
# Generate rank plot configurations and adjust the dataframe
rank_plot_results <- generate_rank_plot_configs(
df = zscores_interactions,
interaction_vars = interaction_vars,
rank_vars = rank_vars,
is_lm = FALSE
)
zscores_interactions_adjusted <- rank_plot_results$adjusted_df
# Generate and save standard rank plots
generate_and_save_plots(
output_dir = out_dir,
file_name = "RankPlots",
plot_configs = rank_plot_results$plot_configs,
grid_layout = list(ncol = 3, nrow = 2)
)
# Generate rank plot configurations for lm variables
rank_lm_plot_results <- generate_rank_plot_configs(
df = zscores_interactions_adjusted,
interaction_vars = interaction_vars,
rank_vars = rank_vars,
is_lm = TRUE
)
# Generate and save lm rank plots
generate_and_save_plots(
output_dir = out_dir,
file_name = "RankPlots_lm",
plot_configs = rank_lm_plot_results$plot_configs,
grid_layout = list(ncol = 3, nrow = 2)
)
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
# Formerly X_NArm
zscores_interactions_filtered <- zscores_interactions %>%