diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index f672ea28..f6db17f7 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -672,20 +672,18 @@ generate_interaction_plot_configs <- function(df, variables) { return(configs) } -generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) { - - df_filtered <- filter_data(df, variables, missing = TRUE) +generate_rank_plot_configs <- function(df_filtered, is_lm = FALSE, adjust = FALSE) { # Define SD bands sd_bands <- c(1, 2, 3) # Define variables for Avg ZScore and Rank Avg ZScore plots - avg_zscore_vars <- c("r", "L", "K", "AUC") + variables <- c("r", "L", "K", "AUC") # Initialize list to store plot configurations configs <- list() - #### 1. SD-Based Plots for L and K #### + # SD-based plots for L and K for (var in c("L", "K")) { for (sd_band in sd_bands) { @@ -741,8 +739,8 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA } } - #### 2. Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC #### - for (var in avg_zscore_vars) { + # Average ZScore and Rank Avg ZScore Plots for r, L, K, and AUC + for (var in variables) { for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) { # Define x and y variables based on plot type @@ -821,9 +819,22 @@ generate_correlation_plot_configs <- function(df, variables) { return(configs) } -filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL, verbose = TRUE) { +filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FALSE, + limits_map = NULL, verbose = TRUE) { for (variable in variables) { + + avg_zscore_col <- paste0("Avg_Zscore_", var) + z_lm_col <- paste0("Z_lm_", var) + rank_col <- paste0("Rank_", var) + rank_lm_col <- paste0("Rank_lm_", var) + + if (adjust) { + message("Replacing NA with 0.001 for interaction variables") + df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]]) + df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]]) + } + if (nf) { non_finite <- df %>% filter(!is.finite(.data[[variable]])) if (verbose && nrow(non_finite) > 0) { @@ -1242,33 +1253,27 @@ main <- function() { file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE) message("Generating rank plots") - # Generate rank plots for L and K using standard ranks + zscores_interactions_joined_filtered <- filter_data(zscores_interactions_joined, variables, missing = TRUE, adjust = TRUE) rank_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions_joined, + df = zscores_interactions_joined_filtered, variables = interaction_vars, is_lm = FALSE, adjust = TRUE ) - - # Save the generated rank plots for L and K generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) message("Generating ranked linear model plots") - # Generate rank plots for L and K using linear model (`lm`) ranks rank_lm_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions_joined, + df = zscores_interactions_joined_filtered, variables = interaction_vars, is_lm = TRUE, adjust = TRUE ) - - # Save the linear model based rank plots for L and K generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) message("Filtering and reranking plots") - # Filter rows where either Z_lm_L or Avg_Zscore_L is not NA # Formerly X_NArm zscores_interactions_filtered <- zscores_interactions %>% group_by(across(all_of(orf_group_vars))) %>% @@ -1296,7 +1301,6 @@ main <- function() { message("Generating filtered ranked plots") rank_plot_filtered_configs <- generate_rank_plot_configs( df = zscores_interactions_filtered, - variables = interaction_vars, is_lm = FALSE, adjust = FALSE ) @@ -1307,7 +1311,6 @@ main <- function() { message("Generating filtered ranked linear model plots") rank_plot_lm_filtered_configs <- generate_rank_plot_configs( df = zscores_interactions_filtered, - variables = interaction_vars, is_lm = TRUE, adjust = FALSE )