diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 67e24870..5d7a21bf 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -567,37 +567,6 @@ generate_box_plot <- function(plot, config) { return(plot) } -# Adjust missing values and calculate ranks -adjust_missing_and_rank <- function(df, variables) { - - # Loop over each variable - for (var in variables) { - # Construct column names - avg_zscore_col <- paste0("Avg_Zscore_", var) - z_lm_col <- paste0("Z_lm_", var) - rank_col <- paste0("Rank_", var) - rank_lm_col <- paste0("Rank_lm_", var) - - # Check if the columns exist in the data frame - if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) { - # Adjust missing values by replacing NA with 0.001 - df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]]) - df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]]) - - # Compute ranks and create new columns - df[[rank_col]] <- rank(df[[avg_zscore_col]]) - df[[rank_lm_col]] <- rank(df[[z_lm_col]]) - } else { - warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in data frame")) - } - # Print some debugging output - print(df %>% select("OrfRep", "Gene", "conc_num", "conc_num_factor", - avg_zscore_col, z_lm_col, rank_col, rank_lm_col)) - } - - return(df) -} - generate_interaction_plot_configs <- function(df, variables) { configs <- list() @@ -738,46 +707,90 @@ generate_interaction_plot_configs <- function(df, variables) { )) } -generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) { +generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm = FALSE) { + # Adjust missing values and compute ranks for each interaction variable + for (var in interaction_vars) { + avg_zscore_col <- paste0("Avg_Zscore_", var) + z_lm_col <- paste0("Z_lm_", var) + rank_col <- paste0("Rank_", var) + rank_lm_col <- paste0("Rank_lm_", var) + + if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) { + # Replace NA with 0.001 for interaction variables + df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]]) + df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]]) + + # Compute ranks for interaction variables + df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep") + df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep") + } else { + warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in the data frame")) + } + } + + # Initialize list to store plot configurations configs <- list() - plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for" + # Generate plot configurations for rank variables + for (var in rank_vars) { + if (is_lm) { + rank_var <- paste0("Rank_lm_", var) + zscore_var <- paste0("Z_lm_", var) + plot_title_prefix <- "Interaction Z score vs. Rank for" + } else { + rank_var <- paste0("Rank_", var) + zscore_var <- paste0("Avg_Zscore_", var) + plot_title_prefix <- "Average Z score vs. Rank for" + } - # Single config with all sd bands - configs[[length(configs) + 1]] <- list( - df = df, - x_var = rank_var, - y_var = zscore_var, - plot_type = "scatter", - title = paste(plot_title_prefix, var, "Rank Plot"), - sd_band = c(1, 2, 3), # Pass all sd bands at once - enhancer_label = list( - x = nrow(df) / 2, y = 10, - label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= 1, ])) # Example for the first SD band - ), - suppressor_label = list( - x = nrow(df) / 2, y = -10, - label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -1, ])) - ), - shape = 3, - size = 0.1 - ) + enhancers_count <- nrow(df[df[[zscore_var]] >= 1, ]) + suppressors_count <- nrow(df[df[[zscore_var]] <= -1, ]) - # Non-annotated version - configs[[length(configs) + 1]] <- list( - df = df, - x_var = rank_var, - y_var = zscore_var, - plot_type = "scatter", - title = paste(plot_title_prefix, var, "Rank Plot No Annotations"), - sd_band = c(1, 2, 3), - enhancer_label = NULL, - suppressor_label = NULL, - shape = 3, - size = 0.1 - ) + # Create Annotated Plot Configuration for rank variables + config_annotated <- list( + df = df, + x_var = rank_var, + y_var = zscore_var, + plot_type = "scatter", + title = paste(plot_title_prefix, var, "Rank Plot"), + sd_band = c(1, 2, 3), + enhancer_label = list( + x = nrow(df) / 2, + y = 10, + label = paste("Deletion Enhancers =", enhancers_count) + ), + suppressor_label = list( + x = nrow(df) / 2, + y = -10, + label = paste("Deletion Suppressors =", suppressors_count) + ), + shape = 3, + size = 0.1 + ) - return(configs) + # Create Non-Annotated Plot Configuration for rank variables + config_no_annotated <- list( + df = df, + x_var = rank_var, + y_var = zscore_var, + plot_type = "scatter", + title = paste(plot_title_prefix, var, "Rank Plot No Annotations"), + sd_band = c(1, 2, 3), + enhancer_label = NULL, + suppressor_label = NULL, + shape = 3, + size = 0.1 + ) + + # Append configurations to the list + configs[[length(configs) + 1]] <- config_annotated + configs[[length(configs) + 1]] <- config_no_annotated + } + + return(list( + adjusted_df = df, + plot_configs = configs + )) } generate_correlation_plot_configs <- function(df, variables) { @@ -1214,24 +1227,41 @@ main <- function() { write.csv(suppressors_lm_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE) - # TODO needs explanation - message("Adjusting and ranking interactions scores") - zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions, variables = interaction_vars) - message("Generating rank plots") - rank_plot_configs <- c( - generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_L", "Avg_Zscore_L", "L"), - generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K") - ) - generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", - plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) + rank_vars <- c("L", "K") - rank_lm_plot_config <- c( - generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE), - generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE) + # Generate rank plot configurations and adjust the dataframe + rank_plot_results <- generate_rank_plot_configs( + df = zscores_interactions, + interaction_vars = interaction_vars, + rank_vars = rank_vars, + is_lm = FALSE + ) + zscores_interactions_adjusted <- rank_plot_results$adjusted_df + + # Generate and save standard rank plots + generate_and_save_plots( + output_dir = out_dir, + file_name = "RankPlots", + plot_configs = rank_plot_results$plot_configs, + grid_layout = list(ncol = 3, nrow = 2) + ) + + # Generate rank plot configurations for lm variables + rank_lm_plot_results <- generate_rank_plot_configs( + df = zscores_interactions_adjusted, + interaction_vars = interaction_vars, + rank_vars = rank_vars, + is_lm = TRUE + ) + + # Generate and save lm rank plots + generate_and_save_plots( + output_dir = out_dir, + file_name = "RankPlots_lm", + plot_configs = rank_lm_plot_results$plot_configs, + grid_layout = list(ncol = 3, nrow = 2) ) - generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", - plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2)) # Formerly X_NArm zscores_interactions_filtered <- zscores_interactions %>%