diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index db2b3d36..aa93e1e0 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -709,23 +709,21 @@ generate_interaction_plot_configs <- function(df, variables) { generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) { - # Adjust missing values and compute ranks for each interaction variable - if (adjust) { - for (var in interaction_vars) { - avg_zscore_col <- paste0("Avg_Zscore_", var) - z_lm_col <- paste0("Z_lm_", var) - rank_col <- paste0("Rank_", var) - rank_lm_col <- paste0("Rank_lm_", var) - + for (var in interaction_vars) { + avg_zscore_col <- paste0("Avg_Zscore_", var) + z_lm_col <- paste0("Z_lm_", var) + rank_col <- paste0("Rank_", var) + rank_lm_col <- paste0("Rank_lm_", var) + + if (adjust) { # Replace NA with 0.001 for interaction variables df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]]) df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]]) - - # Compute ranks for interaction variables - df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep") - df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep") - } + + # Compute ranks for interaction variables + df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep") + df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep") } # Initialize list to store plot configurations @@ -782,11 +780,8 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", ) } } - - return(list( - adjusted_df = df, - plot_configs = configs - )) + + return(configs) } generate_correlation_plot_configs <- function(df, variables) { @@ -1230,7 +1225,7 @@ main <- function() { interaction_vars = interaction_vars, is_lm = FALSE, adjust = TRUE - )$plot_configs + ) # Save the generated rank plots for L and K generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", @@ -1242,27 +1237,26 @@ main <- function() { interaction_vars = interaction_vars, is_lm = TRUE, adjust = TRUE - )$plot_configs + ) # Save the linear model based rank plots for L and K generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) - + message("Filtering and regenerating rank plots") # Filter rows where either Z_lm_L or Avg_Zscore_L is not NA + # Formerly X_NArm zscores_interactions_filtered <- zscores_interactions %>% group_by(across(all_of(orf_group_vars))) %>% filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>% - ungroup() - - # Final filtered correlation calculations and Overlap column - zscores_interactions_filtered <- zscores_interactions_filtered %>% + ungroup() %>% rowwise() %>% mutate( lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA, lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA, lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA, lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA, + Overlap = case_when( Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both", Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both",