From 5518005d033feba0037f9da21eac0c9528214022 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Tue, 10 Sep 2024 17:50:15 -0400 Subject: [PATCH] Add correlation plots --- .../apps/r/calculate_interaction_zscores.R | 192 +++++++++++------- 1 file changed, 114 insertions(+), 78 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index d8885230..aff94b7c 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -530,60 +530,6 @@ generate_interaction_plot_configs <- function(df, variables) { return(plot_configs) } -generate_rank_plot_configs <- function(df, rank_var, zscore_var, label_prefix, is_lm = FALSE) { - configs <- list() - - # Adjust titles for _lm plots if is_lm is TRUE - plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for" - - for (sd_band in c(1, 2, 3)) { - # Annotated version (with text) - configs[[length(configs) + 1]] <- list( - df = df, - x_var = rank_var, - y_var = zscore_var, - plot_type = "rank", - title = paste(plot_title_prefix, label_prefix, "above", sd_band, "SD"), - sd_band = sd_band, - enhancer_label = list( - x = nrow(df) / 2, y = 10, - label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ])) - ), - suppressor_label = list( - x = nrow(df) / 2, y = -10, - label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ])) - ) - ) - - # Non-annotated version (_notext) - configs[[length(configs) + 1]] <- list( - df = df, - x_var = rank_var, - y_var = zscore_var, - plot_type = "rank", - title = paste(plot_title_prefix, label_prefix, "above", sd_band, "SD"), - sd_band = sd_band, - enhancer_label = NULL, # No annotations for _notext - suppressor_label = NULL # No annotations for _notext - ) - } - - return(configs) -} - -generate_correlation_plot_configs <- function(df, lm_list, lm_summaries) { - lapply(seq_along(lm_list), function(i) { - r_squared <- round(lm_summaries[[i]]$r.squared, 3) - list( - x_var = names(lm_list)[i][1], - y_var = names(lm_list)[i][2], - plot_type = "scatter", - title = paste("Correlation between", names(lm_list)[i][1], "and", names(lm_list)[i][2]), - annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared))) - ) - }) -} - # Adjust missing values and calculate ranks adjust_missing_and_rank <- function(df, variables) { @@ -599,6 +545,71 @@ adjust_missing_and_rank <- function(df, variables) { return(df) } +generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) { + configs <- list() + + # Adjust titles for _lm plots if is_lm is TRUE + plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for" + + # Annotated version (with text) + for (sd_band in c(1, 2, 3)) { + configs[[length(configs) + 1]] <- list( + df = df, + x_var = rank_var, + y_var = zscore_var, + plot_type = "rank", + title = paste(plot_title_prefix, var, "above", sd_band, "SD"), + sd_band = sd_band, + enhancer_label = list( + x = nrow(df) / 2, y = 10, + label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ])) + ), + suppressor_label = list( + x = nrow(df) / 2, y = -10, + label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ])) + ) + ) + } + + # Non-annotated version (_notext) + for (sd_band in c(1, 2, 3)) { + configs[[length(configs) + 1]] <- list( + df = df, + x_var = rank_var, + y_var = zscore_var, + plot_type = "rank", + title = paste(plot_title_prefix, var, "above", sd_band, "SD"), + sd_band = sd_band, + enhancer_label = NULL, # No annotations for _notext + suppressor_label = NULL # No annotations for _notext + ) + } + + return(configs) +} + +generate_correlation_plot_configs <- function(df, variables) { + plot_configs <- list() + + for (variable in variables) { + z_lm_var <- paste0("Z_lm_", variable) + avg_zscore_var <- paste0("Avg_Zscore_", variable) + lm_r_squared_col <- paste0("lm_R_squared_", variable) + + plot_configs[[length(plot_configs) + 1]] <- list( + df = df, + x_var = avg_zscore_var, + y_var = z_lm_var, + plot_type = "correlation", + title = paste("Avg Zscore vs lm", variable), + color_var = "Overlap", + correlation_text = paste("R-squared =", round(df[[lm_r_squared_col]][1], 2)) + ) + } + + return(plot_configs) +} + main <- function() { lapply(names(args$experiments), function(exp_name) { exp <- args$experiments[[exp_name]] @@ -674,10 +685,8 @@ main <- function() { write.csv(l_within_2sd_k_stats, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE) write.csv(l_outside_2sd_k_stats, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE) - # Plots - - # Print quality control graphs before removing data due to contamination and - # adjusting missing data to max theoretical values + # Plot configurations + # Each plots list corresponds to a file message("Generating QC plot configurations") l_vs_k_plots <- list( list(df = df, x_var = "L", y_var = "K", plot_type = "scatter", @@ -820,9 +829,9 @@ main <- function() { # Recalculate summary statistics for the background strain message("Calculating summary statistics for background strain") - ss <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) - summary_stats_bg <- ss$summary_stats - df_bg_stats <- ss$df_with_stats + ss_bg <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) + summary_stats_bg <- ss_bg$summary_stats + # df_bg_stats <- ss_bg$df_with_stats write.csv(summary_stats_bg, file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), row.names = FALSE) @@ -935,6 +944,7 @@ main <- function() { write.csv(suppressors_lm_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE) + # TODO needs explanation zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions) rank_plot_configs <- c( @@ -945,30 +955,56 @@ main <- function() { plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2)) rank_lm_plot_config <- c( - generate_rank_lm_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L"), - generate_rank_lm_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K") + generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE), + generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE) ) generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2)) - # interaction_scores_filtered + # Formerly X_NArm + zscores_interactions_filtered <- zscores_interactions %>% + group_by(across(all_of(group_vars))) %>% + filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) + + zscores_interactions_filtered <- zscores_interactions_filtered %>% + mutate( + Overlap = case_when( + Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both", + Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both", + Z_lm_L >= 2 & Avg_Zscore_L <= 2 ~ "Deletion Enhancer lm only", + Z_lm_L <= -2 & Avg_Zscore_L >= -2 ~ "Deletion Suppressor lm only", + Z_lm_L >= 2 & Avg_Zscore_L <= -2 ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score", + Z_lm_L <= -2 & Avg_Zscore_L >= 2 ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score", + TRUE ~ "No Effect" + ), + lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared, + lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared, + lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared, + lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared + ) %>% + ungroup() + rank_plot_configs <- c( + generate_rank_plot_configs(zscores_interactions_filtered, "Rank_L", "Avg_Zscore_L", "L"), + generate_rank_plot_configs(zscores_interactions_filtered, "Rank_K", "Avg_Zscore_K", "K") + ) + + generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", + plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) + + rank_lm_plot_configs <- c( + generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE), + generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE) + ) + + generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", + plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) + + correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, variables) + generate_and_save_plots(output_dir = out_dir, file_name = "Avg_Zscore_vs_lm_NA_rm", + plot_configs = correlation_plot_configs, grid_layout = list(ncol = 2, nrow = 2)) - # lm_summaries <- lapply(lm_list, summary) - # correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries) - # generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs) }) }) } main() - - - # # Correlation plots - # lm_list <- list( - # lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered), - # lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered), - # lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered), - # lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered), - # lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), - # lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) - # ) \ No newline at end of file