From fbbe351cbb87af6583ad87208788d7b8a56178b0 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Wed, 25 Sep 2024 01:57:14 -0400 Subject: [PATCH] Move ranks into rank_plots function --- .../apps/r/calculate_interaction_zscores.R | 107 ++++++++---------- 1 file changed, 48 insertions(+), 59 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 987aa1a3..1161c7c0 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -118,7 +118,7 @@ scale_colour_publication <- function(...) { } # Load the initial dataframe from the easy_results_file -load_and_process_data <- function(easy_results_file, sd = 3) { +load_and_filter_data <- function(easy_results_file, sd = 3) { df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) df <- df %>% @@ -656,7 +656,7 @@ generate_interaction_plot_configs <- function(df, variables) { AUC = c(-6500, 6500) ) - df_filtered <- process_data(df, variables, filter_na = TRUE, limits_map = limits_map) + df_filtered <- filter_data(df, variables, filter_na = TRUE, limits_map = limits_map) # Define annotation label functions generate_annotation_labels <- function(df, var, annotation_name) { @@ -747,11 +747,34 @@ generate_interaction_plot_configs <- function(df, variables) { return(configs) } -generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, overlap_color = FALSE) { +generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, overlap_color = FALSE) { sd_bands <- c(1, 2, 3) + + avg_zscore_cols <- paste0("Avg_Zscore_", variables) + z_lm_cols <- paste0("Z_lm_", variables) + rank_avg_zscore_cols <- paste0("Rank_", variables) + rank_z_lm_cols <- paste0("Rank_lm_", variables) configs <- list() + + if (adjust) { + message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks") + df <- df %>% + mutate( + across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)), + across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .)) + ) + } + + message("Calculating ranks for Avg_Zscore and Z_lm columns") + rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols) + df_ranked <- df %>% + mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}")) + + rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols) + df_ranked <- df_ranked %>% + mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}")) # SD-based plots for L and K for (variable in c("L", "K")) { @@ -768,12 +791,12 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov for (sd_band in sd_bands) { - num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE) - num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE) + num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE) + num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE) # Annotated plot configuration configs[[length(configs) + 1]] <- list( - df = df_filtered, + df = df_ranked, x_var = rank_var, y_var = zscore_var, plot_type = "scatter", @@ -785,14 +808,14 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov alpha_negative = 0.3, annotations = list( list( - x = median(df_filtered[[rank_var]], na.rm = TRUE), + x = median(df_ranked[[rank_var]], na.rm = TRUE), y = 10, label = paste("Deletion Enhancers =", num_enhancers), hjust = 0.5, vjust = 1 ), list( - x = median(df_filtered[[rank_var]], na.rm = TRUE), + x = median(df_ranked[[rank_var]], na.rm = TRUE), y = -10, label = paste("Deletion Suppressors =", num_suppressors), hjust = 0.5, @@ -808,7 +831,7 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov # Non-Annotated Plot Configuration configs[[length(configs) + 1]] <- list( - df = df_filtered, + df = df_ranked, x_var = rank_var, y_var = zscore_var, plot_type = "scatter", @@ -849,30 +872,29 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, ov rectangles <- NULL } - # Fit linear model - lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_filtered) - lm_summary <- summary(lm_model) + # Fit the linear model + lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked) # Extract intercept and slope from the model coefficients intercept <- coef(lm_model)[1] slope <- coef(lm_model)[2] configs[[length(configs) + 1]] <- list( - df = df_filtered, + df = df_ranked, x_var = x_var, y_var = y_var, plot_type = "scatter", title = title, annotations = list( list( - x = median(df_filtered[[rank_var]], na.rm = TRUE), + x = median(df_ranked[[rank_var]], na.rm = TRUE), y = 10, label = paste("Deletion Enhancers =", num_enhancers), hjust = 0.5, vjust = 1 ), list( - x = median(df_filtered[[rank_var]], na.rm = TRUE), + x = median(df_ranked[[rank_var]], na.rm = TRUE), y = -10, label = paste("Deletion Suppressors =", num_suppressors), hjust = 0.5, @@ -955,8 +977,7 @@ generate_correlation_plot_configs <- function(df) { return(configs) } -process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, adjust = FALSE, - rank = FALSE, limits_map = NULL) { +filter_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, limits_map = NULL) { avg_zscore_cols <- paste0("Avg_Zscore_", variables) z_lm_cols <- paste0("Z_lm_", variables) @@ -986,29 +1007,6 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad } } - if (adjust) { - message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks") - df <- df %>% - mutate( - across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)), - across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .)) - ) - } - - # Calculate and add rank columns - # TODO probably should be moved to separate function - if (rank) { - message("Calculating ranks for Avg_Zscore and Z_lm columns") - - rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols) - df <- df %>% - mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}")) - - rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols) - df <- df %>% - mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}")) - } - return(df) } @@ -1028,7 +1026,7 @@ main <- function() { "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB") message("Loading and filtering data for experiment: ", exp_name) - df <- load_and_process_data(args$easy_results_file, sd = exp_sd) %>% + df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>% update_gene_names(args$sgd_gene_list) %>% as_tibble() @@ -1399,25 +1397,21 @@ main <- function() { file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE) message("Generating rank plots") - # Formerly InteractionScores_AdjustMissing - zscores_interactions_joined_ranked <- process_data( + rank_plot_configs <- generate_rank_plot_configs( df = zscores_interactions_joined, variables = interaction_vars, - adjust = TRUE, - rank = TRUE) - rank_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions_joined_ranked, - variables = interaction_vars, - is_lm = FALSE + is_lm = FALSE, + adjust = TRUE ) generate_and_save_plots(out_dir = out_dir, filename = "RankPlots", plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) message("Generating ranked linear model plots") rank_lm_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions_joined_ranked, + df = zscores_interactions_joined, variables = interaction_vars, - is_lm = TRUE + is_lm = TRUE, + adjust = TRUE ) generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm", plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) @@ -1444,21 +1438,15 @@ main <- function() { lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared ) - # Re-rank - zscores_interactions_filtered_ranked <- process_data( - df = zscores_interactions_filtered, - variables = interaction_vars, - rank = TRUE - ) - + message("Generating filtered ranked plots") rank_plot_filtered_configs <- generate_rank_plot_configs( df = zscores_interactions_filtered_ranked, variables = interaction_vars, is_lm = FALSE, + adjust = FALSE, overlap_color = TRUE ) - message("Generating filtered ranked plots") generate_and_save_plots( out_dir = out_dir, filename = "RankPlots_na_rm", @@ -1470,6 +1458,7 @@ main <- function() { df = zscores_interactions_filtered_ranked, variables = interaction_vars, is_lm = TRUE, + adjust = FALSE, overlap_color = TRUE ) generate_and_save_plots(