diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 30e7d1ed..f672ea28 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -457,7 +457,7 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la } generate_scatter_plot <- function(plot, config) { - + if (!is.null(config$delta_bg_point) && config$delta_bg_point) { plot <- plot + geom_point( shape = config$shape %||% 3, @@ -472,31 +472,32 @@ generate_scatter_plot <- function(plot, config) { } else { plot <- plot + geom_point( shape = config$shape %||% 3, - position = if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity" + position = if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity", + size = config$size %||% 0.1 ) } - + # Add smooth line if specified if (!is.null(config$add_smooth) && config$add_smooth) { - plot <- if (!is.null(config$lm_line)) { - plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope) + if (!is.null(config$lm_line)) { + plot <- plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "blue") } else { - plot + geom_smooth(method = "lm", se = FALSE) + plot <- plot + geom_smooth(method = "lm", se = FALSE, color = "blue") } } - - # Add SD bands (iterate over sd_band only here) - if (!is.null(config$sd_band)) { - for (i in config$sd_band) { + + # Add SD bands if specified + if (!is.null(config$sd_band_values)) { + for (sd_band in config$sd_band_values) { plot <- plot + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-i, i), color = "gray") + annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_band, ymax = Inf, fill = "#542788", alpha = 0.3) + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_band, ymax = -Inf, fill = "orange", alpha = 0.3) + + geom_hline(yintercept = c(-sd_band, sd_band), color = "gray") } } - + # Add error bars if specified - if (!is.null(config$error_bar) && config$error_bar) { + if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) { y_mean_col <- paste0("mean_", config$y_var) y_sd_col <- paste0("sd_", config$y_var) plot <- plot + geom_errorbar( @@ -507,7 +508,7 @@ generate_scatter_plot <- function(plot, config) { alpha = 0.3 ) } - + # Add x-axis customization if specified if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { plot <- plot + scale_x_discrete( @@ -516,17 +517,17 @@ generate_scatter_plot <- function(plot, config) { labels = config$x_labels ) } - + # Use coord_cartesian for zooming in without removing data outside the range if (!is.null(config$coord_cartesian)) { plot <- plot + coord_cartesian(ylim = config$coord_cartesian) } - + # Use scale_y_continuous for setting the y-axis limits if (!is.null(config$ylim_vals)) { plot <- plot + scale_y_continuous(limits = config$ylim_vals) } - + # Add annotations if specified if (!is.null(config$annotations)) { for (annotation in config$annotations) { @@ -534,19 +535,20 @@ generate_scatter_plot <- function(plot, config) { x = annotation$x, y = annotation$y, label = annotation$label, - na.rm = TRUE) + na.rm = TRUE + ) } } - + # Add titles and themes if specified if (!is.null(config$title)) { plot <- plot + ggtitle(config$title) } - + if (!is.null(config$legend_position)) { plot <- plot + theme(legend.position = config$legend_position) } - + return(plot) } @@ -671,81 +673,124 @@ generate_interaction_plot_configs <- function(df, variables) { } generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) { - + df_filtered <- filter_data(df, variables, missing = TRUE) - - for (var in variables) { - avg_zscore_col <- paste0("Avg_Zscore_", var) - z_lm_col <- paste0("Z_lm_", var) - rank_col <- paste0("Rank_", var) - rank_lm_col <- paste0("Rank_lm_", var) - - if (adjust) { - # Replace NA with 0.001 for interaction variables - df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]]) - df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]]) - } - - # Compute ranks for interaction variables - df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep") - df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep") - } + + # Define SD bands + sd_bands <- c(1, 2, 3) + + # Define variables for Avg ZScore and Rank Avg ZScore plots + avg_zscore_vars <- c("r", "L", "K", "AUC") # Initialize list to store plot configurations configs <- list() - # Generate plot configurations for rank variables (L and K) with sd bands + #### 1. SD-Based Plots for L and K #### for (var in c("L", "K")) { - if (is_lm) { - rank_var <- paste0("Rank_lm_", var) - zscore_var <- paste0("Z_lm_", var) - plot_title_prefix <- "Interaction Z score vs. Rank for" - } else { - rank_var <- paste0("Rank_", var) - zscore_var <- paste0("Avg_Zscore_", var) - plot_title_prefix <- "Average Z score vs. Rank for" - } - - # Create plot configurations for each SD band - for (sd_band in c(1, 2, 3)) { - # Annotated version + for (sd_band in sd_bands) { + + # Determine columns based on whether it's lm or not + if (is_lm) { + rank_var <- paste0(var, "_Rank_lm") + zscore_var <- paste0("Z_lm_", var) + y_label <- paste("Int Z score", var) + } else { + rank_var <- paste0(var, "_Rank") + zscore_var <- paste0("Avg_Zscore_", var) + y_label <- paste("Avg Z score", var) + } + + # Annotated Plot Configuration configs[[length(configs) + 1]] <- list( - df = df, + df = df_filtered, x_var = rank_var, y_var = zscore_var, plot_type = "scatter", - title = paste(plot_title_prefix, var, "above", sd_band, "SD"), + title = paste(y_label, "vs. Rank for", var, "above", sd_band, "SD"), sd_band = sd_band, - enhancer_label = list( - x = nrow(df) / 2, - y = 10, - label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ])) - ), - suppressor_label = list( - x = nrow(df) / 2, - y = -10, - label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ])) + annotations = list( + list( + x = median(df_filtered[[rank_var]], na.rm = TRUE), + y = 10, + label = paste("Deletion Enhancers =", sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)) + ), + list( + x = median(df_filtered[[rank_var]], na.rm = TRUE), + y = -10, + label = paste("Deletion Suppressors =", sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)) + ) ), + sd_band_values = sd_band, shape = 3, size = 0.1 ) - - # Non-annotated version (_notext) + + # Non-Annotated Plot Configuration configs[[length(configs) + 1]] <- list( - df = df, + df = df_filtered, x_var = rank_var, y_var = zscore_var, plot_type = "scatter", - title = paste(plot_title_prefix, var, "above", sd_band, "SD No Annotations"), + title = paste(y_label, "vs. Rank for", var, "above", sd_band, "SD No Annotations"), sd_band = sd_band, - enhancer_label = NULL, - suppressor_label = NULL, + annotations = NULL, + sd_band_values = sd_band, shape = 3, size = 0.1 ) } } - + + #### 2. Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC #### + for (var in avg_zscore_vars) { + for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) { + + # Define x and y variables based on plot type + if (plot_type == "Avg_Zscore_vs_lm") { + x_var <- paste0("Avg_Zscore_", var) + y_var <- paste0("Z_lm_", var) + title_suffix <- paste("Avg Zscore vs lm", var) + } else if (plot_type == "Rank_Avg_Zscore_vs_lm") { + x_var <- paste0(var, "_Rank") + y_var <- paste0(var, "_Rank_lm") + title_suffix <- paste("Rank Avg Zscore vs lm", var) + } + + # Determine y-axis label + if (plot_type == "Avg_Zscore_vs_lm") { + y_label <- paste("Z lm", var) + } else { + y_label <- paste("Rank lm", var) + } + + # Determine correlation text (R-squared) + lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered) + r_squared <- summary(lm_fit)$r.squared + + # Plot Configuration + configs[[length(configs) + 1]] <- list( + df = df_filtered, + x_var = x_var, + y_var = y_var, + plot_type = "scatter", + title = title_suffix, + annotations = list( + list( + x = 0, + y = 0, + label = paste("R-squared =", round(r_squared, 2)) + ) + ), + sd_band_values = NULL, # Not applicable + shape = 3, + size = 0.1, + add_smooth = TRUE, + lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]), + legend_position = "right" + ) + } + } + return(configs) } @@ -1043,15 +1088,15 @@ main <- function() { ) message("Generating quality control plots") - # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) - # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) - # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) - # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) - # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) - # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) - # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) - # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) - # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) + generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) + generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) + generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) + generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) + generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) + generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) + generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) + generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) + generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) # TODO: Originally this filtered L NA's # Let's try to avoid for now since stats have already been calculated