From 3833f751849e8a7d0aec76de06ea10562eccfbfb Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Tue, 10 Sep 2024 03:08:15 -0400 Subject: [PATCH] Add rank plot configs --- .../apps/r/calculate_interaction_zscores5.R | 183 ++++++++++++------ 1 file changed, 119 insertions(+), 64 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 004a16cf..556c4c95 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -231,16 +231,17 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c ungroup() interaction_scores <- interaction_scores %>% - mutate( - Raw_Shift_L = mean_L[[1]] - bg_means$L, - Raw_Shift_K = mean_K[[1]] - bg_means$K, - Raw_Shift_r = mean_r[[1]] - bg_means$r, - Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC, - Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]], - Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]], - Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]], - Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]] - ) + group_by(across(all_of(group_vars))) %>% + mutate( + Raw_Shift_L = mean_L[[1]] - bg_means$L, + Raw_Shift_K = mean_K[[1]] - bg_means$K, + Raw_Shift_r = mean_r[[1]] - bg_means$r, + Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC, + Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]], + Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]], + Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]], + Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]] + ) interaction_scores <- interaction_scores %>% mutate( @@ -263,28 +264,33 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L) ) - # Calculate Z-scores for each variable + # Calculate linear models and interaction scores interaction_scores <- interaction_scores %>% mutate( + lm_L = lm(Delta_L ~ conc_num_factor), + lm_K = lm(Delta_K ~ conc_num_factor), + lm_r = lm(Delta_r ~ conc_num_factor), + lm_AUC = lm(Delta_AUC ~ conc_num_factor), Zscore_L = Delta_L / WT_sd_L, Zscore_K = Delta_K / WT_sd_K, Zscore_r = Delta_r / WT_sd_r, Zscore_AUC = Delta_AUC / WT_sd_AUC ) - # Calculate linear models interaction_scores <- interaction_scores %>% mutate( - lm_L = lm(Delta_L ~ conc_num_factor), - lm_K = lm(Delta_K ~ conc_num_factor), - lm_r = lm(Delta_r ~ conc_num_factor), - lm_AUC = Dlm(Delta_AUC ~ conc_num_factor) + Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), + Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), + Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), + Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) ) - # Calculate linear models and interaction scores interaction_scores_all <- interaction_scores %>% - group_by(across(all_of(group_vars))) %>% mutate( + Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, + Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs, + Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1), + Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1), lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1], lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1], lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1], @@ -301,11 +307,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE), Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE), Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE), - Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE), - Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / num_non_removed_concs, - Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / num_non_removed_concs, - Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / (total_conc_num - 1), - Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / (total_conc_num - 1) + Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) ) # Arrange results by Z_lm_L and NG @@ -541,7 +543,6 @@ adjust_missing_and_rank <- function(df, variables) { return(df) } - main <- function() { lapply(names(args$experiments), function(exp_name) { exp <- args$experiments[[exp_name]] @@ -625,7 +626,8 @@ main <- function() { list(df = df, x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", color_var = "conc_num", - legend_position = "right") + legend_position = "right" + ) ) above_threshold_plots <- list( @@ -678,15 +680,10 @@ main <- function() { error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE) # Create the plot configuration - plot_config <- list( - df = df_plot, - x_var = "scan", - y_var = var, - plot_type = plot_type, + plot_config <- list(df = df_plot, x_var = "scan", y_var = var, plot_type = plot_type, title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"), - error_bar = error_bar, - color_var = "conc_num" - ) + error_bar = error_bar, color_var = "conc_num") + plate_analysis_plots <- append(plate_analysis_plots, list(plot_config)) } } @@ -718,14 +715,16 @@ main <- function() { list(df = X_outside_2SD_K, x_var = "l", y_var = "K", plot_type = "scatter", title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", - legend_position = "right") + legend_position = "right" + ) ) delta_bg_outside_2sd_k_plots <- list( list(df = X_outside_2SD_K, x_var = "delta_bg", y_var = "K", plot_type = "scatter", title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", - legend_position = "right") + legend_position = "right" + ) ) # Generate and save plots for each QC step @@ -744,7 +743,6 @@ main <- function() { # TODO: Originally this filtered L NA's # Let's try to avoid for now since stats have already been calculated - # Process background strains bg_strains <- c("YDL227C") lapply(bg_strains, function(strain) { @@ -847,10 +845,10 @@ main <- function() { # Save enhancers and suppressors message("Writing enhancer/suppressor csv files") - write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) - write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) - write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) - write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE) + write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L.csv"), row.names = FALSE) + write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L.csv"), row.names = FALSE) + write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K.csv"), row.names = FALSE) + write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K.csv"), row.names = FALSE) # Combine conditions for enhancers and suppressors enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ] @@ -858,9 +856,9 @@ main <- function() { # Save combined enhancers and suppressors write.csv(enhancers_and_suppressors_L, - file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE) + file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_L.csv"), row.names = FALSE) write.csv(enhancers_and_suppressors_K, - file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE) + file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_K.csv"), row.names = FALSE) # Handle linear model based enhancers and suppressors lm_threshold <- 2 @@ -872,37 +870,94 @@ main <- function() { # Save linear model based enhancers and suppressors message("Writing linear model enhancer/suppressor csv files") write.csv(enhancers_lm_L, - file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE) + file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L_lm.csv"), row.names = FALSE) write.csv(suppressors_lm_L, - file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE) + file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L_lm.csv"), row.names = FALSE) write.csv(enhancers_lm_K, - file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE) + file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K_lm.csv"), row.names = FALSE) write.csv(suppressors_lm_K, - file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) + file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE) - # Correlation plots - lm_list <- list( - lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered), - lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered), - lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered), - lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered), - lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), - lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) + zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions) + + # Generate ranked plots + rank_plot_config <- list( + # L Rank plots with different SD thresholds + list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank", + title = "Average Z score vs. Rank for L above 1SD", sd_band = 1, + enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10, + label = paste("Deletion Enhancers =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 1, ]))), + suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10, + label = paste("Deletion Suppressors =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -1, ]))) + ), + list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank", + title = "Average Z score vs. Rank for L above 2SD", sd_band = 2, + enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10, + label = paste("Deletion Enhancers =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 2, ]))), + suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10, + label = paste("Deletion Suppressors =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -2, ]))) + ), + list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank", + title = "Average Z score vs. Rank for L above 3SD", sd_band = 3, + enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10, + label = paste("Deletion Enhancers =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 3, ]))), + suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10, + label = paste("Deletion Suppressors =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -3, ]))) + ), + + # K Rank plots with different SD thresholds + list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank", + title = "Average Z score vs. Rank for K above 1SD", sd_band = 1, + enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10, + label = paste("Deletion Enhancers =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 1, ]))), + suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10, + label = paste("Deletion Suppressors =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -1, ]))) + ), + list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank", + title = "Average Z score vs. Rank for K above 2SD", sd_band = 2, + enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10, + label = paste("Deletion Enhancers =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 2, ]))), + suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10, + label = paste("Deletion Suppressors =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -2, ]))) + ), + list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank", + title = "Average Z score vs. Rank for K above 3SD", sd_band = 3, + enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10, + label = paste("Deletion Enhancers =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 3, ]))), + suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10, + label = paste("Deletion Suppressors =", + nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -3, ]))) + ) ) + # Generate and save rank plots + generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", + plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2)) + + # # Correlation plots + # lm_list <- list( + # lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered), + # lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered), + # lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered), + # lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered), + # lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), + # lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) + # ) + lm_summaries <- lapply(lm_list, summary) correlation_plot_configs <- correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries) generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs) - - # Generate ranked plots - rank_plot_config <- list( - list(x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "scatter", title = "Rank vs Avg Z score for L"), - list(x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "scatter", title = "Rank vs Avg Z score for K"), - list(x_var = "r_Rank", y_var = "Avg_Zscore_r", plot_type = "scatter", title = "Rank vs Avg Z score for r"), - list(x_var = "AUC_Rank", y_var = "Avg_Zscore_AUC", plot_type = "scatter", title = "Rank vs Avg Z score for AUC") - ) - # Generate and save rank plots using the existing plotting framework - generate_and_save_plots(zscores_interactions_filtered, output_dir, rank_plot_config) }) }) }