Browse Source

Separate adjustments from ranks

Bryan Roessler 7 months ago
parent
commit
22236fef49
1 changed files with 19 additions and 25 deletions
  1. 19 25
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 19 - 25
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -709,23 +709,21 @@ generate_interaction_plot_configs <- function(df, variables) {
 
 generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
 
-  # Adjust missing values and compute ranks for each interaction variable
-  if (adjust) {
-    for (var in interaction_vars) {
-      avg_zscore_col <- paste0("Avg_Zscore_", var)
-      z_lm_col <- paste0("Z_lm_", var)
-      rank_col <- paste0("Rank_", var)
-      rank_lm_col <- paste0("Rank_lm_", var)
-      
+  for (var in interaction_vars) {
+    avg_zscore_col <- paste0("Avg_Zscore_", var)
+    z_lm_col <- paste0("Z_lm_", var)
+    rank_col <- paste0("Rank_", var)
+    rank_lm_col <- paste0("Rank_lm_", var)
+
+    if (adjust) {
       # Replace NA with 0.001 for interaction variables
       df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
       df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
-      
-      # Compute ranks for interaction variables
-      df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
-      df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
-
     }
+
+    # Compute ranks for interaction variables
+    df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
+    df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
   }
   
   # Initialize list to store plot configurations
@@ -782,11 +780,8 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L",
       )
     }
   }
-  
-  return(list(
-    adjusted_df = df,
-    plot_configs = configs
-  ))
+    
+  return(configs)
 }
 
 generate_correlation_plot_configs <- function(df, variables) {
@@ -1230,7 +1225,7 @@ main <- function() {
         interaction_vars = interaction_vars,
         is_lm = FALSE,
         adjust = TRUE
-      )$plot_configs
+      )
 
       # Save the generated rank plots for L and K
       generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
@@ -1242,27 +1237,26 @@ main <- function() {
         interaction_vars = interaction_vars,
         is_lm = TRUE,
         adjust = TRUE
-      )$plot_configs
+      )
 
       # Save the linear model based rank plots for L and K
       generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
         plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
-      
+
       message("Filtering and regenerating rank plots")
       # Filter rows where either Z_lm_L or Avg_Zscore_L is not NA
+      # Formerly X_NArm
       zscores_interactions_filtered <- zscores_interactions %>%
         group_by(across(all_of(orf_group_vars))) %>%
         filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
-        ungroup()
-
-      # Final filtered correlation calculations and Overlap column
-      zscores_interactions_filtered <- zscores_interactions_filtered %>%
+        ungroup() %>%
         rowwise() %>%
         mutate(
           lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
           lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
           lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA,
           lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA,
+
           Overlap = case_when(
             Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
             Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both",