diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 8e15d0f4..2b09829a 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -721,140 +721,132 @@ generate_interaction_plot_configs <- function(df, variables) { } generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) { - # Define SD bands - sd_bands <- c(1, 2, 3) - - # Initialize list to store plot configurations - configs <- list() - - # SD-based plots for L and K - for (variable in c("L", "K")) { - for (sd_band in sd_bands) { - # Determine columns based on whether it's lm or not - if (is_lm) { - rank_var <- paste0(variable, "_Rank_lm") - zscore_var <- paste0("Z_lm_", variable) - y_label <- paste("Int Z score", variable) - } else { - rank_var <- paste0(variable, "_Rank") - zscore_var <- paste0("Avg_Zscore_", variable) - y_label <- paste("Avg Z score", variable) - } - - # Calculate counts for annotations - num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE) - num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE) - - # Annotated Plot Configuration - configs[[length(configs) + 1]] <- list( - df = df_filtered, - x_var = rank_var, - y_var = zscore_var, - plot_type = "scatter", - title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"), - sd_band = sd_band, - annotations = list( - list( - x = median(df_filtered[[rank_var]], na.rm = TRUE), - y = 10, - label = paste("Deletion Enhancers =", num_enhancers) - ), - list( - x = median(df_filtered[[rank_var]], na.rm = TRUE), - y = -10, - label = paste("Deletion Suppressors =", num_suppressors) - ) - ), - sd_band_values = sd_band, - shape = 3, - size = 0.1, - y_label = y_label, - x_label = "Rank", - legend_position = "none" - ) - - # Non-Annotated Plot Configuration - configs[[length(configs) + 1]] <- list( - df = df_filtered, - x_var = rank_var, - y_var = zscore_var, - plot_type = "scatter", - title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"), - sd_band = sd_band, - annotations = NULL, - sd_band_values = sd_band, - shape = 3, - size = 0.1, - y_label = y_label, - x_label = "Rank", - legend_position = "none" - ) - } + + sd_bands <- c(1, 2, 3) + + configs <- list() + + # SD-based plots for L and K + for (variable in c("L", "K")) { + for (sd_band in sd_bands) { + # Determine columns based on whether it's lm or not + if (is_lm) { + rank_var <- paste0(variable, "_Rank_lm") + zscore_var <- paste0("Z_lm_", variable) + y_label <- paste("Int Z score", variable) + } else { + rank_var <- paste0(variable, "_Rank") + zscore_var <- paste0("Avg_Zscore_", variable) + y_label <- paste("Avg Z score", variable) + } + + num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE) + num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE) + + # Annotated plot configuration + configs[[length(configs) + 1]] <- list( + df = df_filtered, + x_var = rank_var, + y_var = zscore_var, + plot_type = "scatter", + title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"), + sd_band = sd_band, + annotations = list( + list( + x = median(df_filtered[[rank_var]], na.rm = TRUE), + y = 10, + label = paste("Deletion Enhancers =", num_enhancers) + ), + list( + x = median(df_filtered[[rank_var]], na.rm = TRUE), + y = -10, + label = paste("Deletion Suppressors =", num_suppressors) + ) + ), + sd_band_values = sd_band, + shape = 3, + size = 0.1, + y_label = y_label, + x_label = "Rank", + legend_position = "none" + ) + + # Non-Annotated Plot Configuration + configs[[length(configs) + 1]] <- list( + df = df_filtered, + x_var = rank_var, + y_var = zscore_var, + plot_type = "scatter", + title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"), + sd_band = sd_band, + annotations = NULL, + sd_band_values = sd_band, + shape = 3, + size = 0.1, + y_label = y_label, + x_label = "Rank", + legend_position = "none" + ) } + } - # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC - for (variable in variables) { - for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) { - # Define x and y variables based on plot type - if (plot_type == "Avg_Zscore_vs_lm") { - x_var <- paste0("Avg_Zscore_", variable) - y_var <- paste0("Z_lm_", variable) - title_suffix <- paste("Avg Zscore vs lm", variable) - - # Add rectangles configuration for Avg_Zscore_vs_lm - rectangles <- list( - list( - xmin = -2, xmax = 2, ymin = -2, ymax = 2, - fill = NA, color = "grey20", alpha = 0.1 - ) - ) - } else { - x_var <- paste0(variable, "_Rank") - y_var <- paste0(variable, "_Rank_lm") - title_suffix <- paste("Rank Avg Zscore vs lm", variable) - rectangles <- NULL - } - - # Fit linear model - lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered) - - # Check for perfect fit - if (summary(lm_fit)$sigma == 0) { - next # Skip this iteration if the fit is perfect - } - - # Calculate R-squared - r_squared <- summary(lm_fit)$r.squared - - # Plot Configuration - configs[[length(configs) + 1]] <- list( - df = df_filtered, - x_var = x_var, - y_var = y_var, - plot_type = "scatter", - title = title_suffix, - annotations = list( - list( - x = 0, - y = 0, - label = paste("R-squared =", round(r_squared, 2)) - ) - ), - sd_band_values = NULL, # Not applicable - shape = 3, - size = 0.1, - add_smooth = TRUE, - lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]), - legend_position = "right", - color_var = "Overlap", - x_label = x_var, - y_label = y_var, - rectangles = rectangles # Add rectangles configuration - ) - } + # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC + for (variable in variables) { + for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) { + # Define specific variables based on plot type + if (plot_type == "Avg_Zscore_vs_lm") { + x_var <- paste0("Avg_Zscore_", variable) + y_var <- paste0("Z_lm_", variable) + title_suffix <- paste("Avg Zscore vs lm", variable) + rectangles <- list( + list(xmin = -2, xmax = 2, ymin = -2, ymax = 2, + fill = NA, color = "grey20", alpha = 0.1 + ) + ) + } else { + x_var <- paste0(variable, "_Rank") + y_var <- paste0(variable, "_Rank_lm") + title_suffix <- paste("Rank Avg Zscore vs lm", variable) + rectangles <- NULL + } + + # Fit linear model + lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered) + + # Check for perfect fit + if (summary(lm_fit)$sigma == 0) { + next # Skip this iteration if the fit is perfect + } + + r_squared <- summary(lm_fit)$r.squared + + configs[[length(configs) + 1]] <- list( + df = df_filtered, + x_var = x_var, + y_var = y_var, + plot_type = "scatter", + title = title_suffix, + annotations = list( + list( + x = 0, + y = 0, + label = paste("R-squared =", round(r_squared, 2)) + ) + ), + sd_band_values = NULL, # Not applicable + shape = 3, + size = 0.1, + add_smooth = TRUE, + lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]), + legend_position = "right", + color_var = "Overlap", + x_label = x_var, + y_label = y_var, + rectangles = rectangles # Add rectangles configuration + ) } - - return(configs) + } + return(configs) } generate_correlation_plot_configs <- function(df) { @@ -1247,16 +1239,16 @@ main <- function() { ) ) - message("Generating quality control plots") - generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) - generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) - generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) - generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) - generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) - generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) - generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) - generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) - generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) + # message("Generating quality control plots") + # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) + # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) + # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) + # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) + # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) + # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) + # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) + # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) + # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) # Process background strains bg_strains <- c("YDL227C")