Refactor rank plots

This commit is contained in:
2024-09-15 19:32:25 -04:00
parent eb2e1ae7d6
commit e00df4be45

View File

@@ -567,37 +567,6 @@ generate_box_plot <- function(plot, config) {
return(plot) return(plot)
} }
# Adjust missing values and calculate ranks
adjust_missing_and_rank <- function(df, variables) {
# Loop over each variable
for (var in variables) {
# Construct column names
avg_zscore_col <- paste0("Avg_Zscore_", var)
z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var)
rank_lm_col <- paste0("Rank_lm_", var)
# Check if the columns exist in the data frame
if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
# Adjust missing values by replacing NA with 0.001
df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
# Compute ranks and create new columns
df[[rank_col]] <- rank(df[[avg_zscore_col]])
df[[rank_lm_col]] <- rank(df[[z_lm_col]])
} else {
warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in data frame"))
}
# Print some debugging output
print(df %>% select("OrfRep", "Gene", "conc_num", "conc_num_factor",
avg_zscore_col, z_lm_col, rank_col, rank_lm_col))
}
return(df)
}
generate_interaction_plot_configs <- function(df, variables) { generate_interaction_plot_configs <- function(df, variables) {
configs <- list() configs <- list()
@@ -738,33 +707,69 @@ generate_interaction_plot_configs <- function(df, variables) {
)) ))
} }
generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) { generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm = FALSE) {
# Adjust missing values and compute ranks for each interaction variable
for (var in interaction_vars) {
avg_zscore_col <- paste0("Avg_Zscore_", var)
z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var)
rank_lm_col <- paste0("Rank_lm_", var)
if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
# Replace NA with 0.001 for interaction variables
df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
# Compute ranks for interaction variables
df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
} else {
warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in the data frame"))
}
}
# Initialize list to store plot configurations
configs <- list() configs <- list()
plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for" # Generate plot configurations for rank variables
for (var in rank_vars) {
if (is_lm) {
rank_var <- paste0("Rank_lm_", var)
zscore_var <- paste0("Z_lm_", var)
plot_title_prefix <- "Interaction Z score vs. Rank for"
} else {
rank_var <- paste0("Rank_", var)
zscore_var <- paste0("Avg_Zscore_", var)
plot_title_prefix <- "Average Z score vs. Rank for"
}
# Single config with all sd bands enhancers_count <- nrow(df[df[[zscore_var]] >= 1, ])
configs[[length(configs) + 1]] <- list( suppressors_count <- nrow(df[df[[zscore_var]] <= -1, ])
# Create Annotated Plot Configuration for rank variables
config_annotated <- list(
df = df, df = df,
x_var = rank_var, x_var = rank_var,
y_var = zscore_var, y_var = zscore_var,
plot_type = "scatter", plot_type = "scatter",
title = paste(plot_title_prefix, var, "Rank Plot"), title = paste(plot_title_prefix, var, "Rank Plot"),
sd_band = c(1, 2, 3), # Pass all sd bands at once sd_band = c(1, 2, 3),
enhancer_label = list( enhancer_label = list(
x = nrow(df) / 2, y = 10, x = nrow(df) / 2,
label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= 1, ])) # Example for the first SD band y = 10,
label = paste("Deletion Enhancers =", enhancers_count)
), ),
suppressor_label = list( suppressor_label = list(
x = nrow(df) / 2, y = -10, x = nrow(df) / 2,
label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -1, ])) y = -10,
label = paste("Deletion Suppressors =", suppressors_count)
), ),
shape = 3, shape = 3,
size = 0.1 size = 0.1
) )
# Non-annotated version # Create Non-Annotated Plot Configuration for rank variables
configs[[length(configs) + 1]] <- list( config_no_annotated <- list(
df = df, df = df,
x_var = rank_var, x_var = rank_var,
y_var = zscore_var, y_var = zscore_var,
@@ -777,7 +782,15 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA
size = 0.1 size = 0.1
) )
return(configs) # Append configurations to the list
configs[[length(configs) + 1]] <- config_annotated
configs[[length(configs) + 1]] <- config_no_annotated
}
return(list(
adjusted_df = df,
plot_configs = configs
))
} }
generate_correlation_plot_configs <- function(df, variables) { generate_correlation_plot_configs <- function(df, variables) {
@@ -1214,24 +1227,41 @@ main <- function() {
write.csv(suppressors_lm_K, write.csv(suppressors_lm_K,
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
# TODO needs explanation
message("Adjusting and ranking interactions scores")
zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions, variables = interaction_vars)
message("Generating rank plots") message("Generating rank plots")
rank_plot_configs <- c( rank_vars <- c("L", "K")
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_L", "Avg_Zscore_L", "L"),
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
)
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
rank_lm_plot_config <- c( # Generate rank plot configurations and adjust the dataframe
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE), rank_plot_results <- generate_rank_plot_configs(
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE) df = zscores_interactions,
interaction_vars = interaction_vars,
rank_vars = rank_vars,
is_lm = FALSE
)
zscores_interactions_adjusted <- rank_plot_results$adjusted_df
# Generate and save standard rank plots
generate_and_save_plots(
output_dir = out_dir,
file_name = "RankPlots",
plot_configs = rank_plot_results$plot_configs,
grid_layout = list(ncol = 3, nrow = 2)
)
# Generate rank plot configurations for lm variables
rank_lm_plot_results <- generate_rank_plot_configs(
df = zscores_interactions_adjusted,
interaction_vars = interaction_vars,
rank_vars = rank_vars,
is_lm = TRUE
)
# Generate and save lm rank plots
generate_and_save_plots(
output_dir = out_dir,
file_name = "RankPlots_lm",
plot_configs = rank_lm_plot_results$plot_configs,
grid_layout = list(ncol = 3, nrow = 2)
) )
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
# Formerly X_NArm # Formerly X_NArm
zscores_interactions_filtered <- zscores_interactions %>% zscores_interactions_filtered <- zscores_interactions %>%