Browse Source

Fix dataframe joining dimensionality

Bryan Roessler 6 months ago
parent
commit
5a3fcec55f
1 changed files with 42 additions and 23 deletions
  1. 42 23
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 42 - 23
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -212,7 +212,7 @@ calculate_summary_stats <- function(df, variables, group_vars) {
 }
 
 calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, overlap_threshold = 2) {
-  
+
   # Calculate total concentration variables
   total_conc_num <- length(unique(df$conc_num))
   
@@ -290,7 +290,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
     }) %>%
     ungroup()
 
-  # Continue with the rest of the function as before
+  # Summary statistics for lm scores
   lm_means_sds <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
     summarise(
@@ -314,7 +314,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
       Z_lm_r = (lm_Score_r - mean_lm_r) / sd_lm_r,
       Z_lm_AUC = (lm_Score_AUC - mean_lm_AUC) / sd_lm_AUC
     )
-  
+
   # Build summary stats (interactions)
   interactions <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
@@ -340,32 +340,51 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
       Z_Shift_L = first(Z_Shift_L),
       Z_Shift_K = first(Z_Shift_K),
       Z_Shift_r = first(Z_Shift_r),
-      Z_Shift_AUC = first(Z_Shift_AUC)
+      Z_Shift_AUC = first(Z_Shift_AUC),
       
-    ) %>%
-    arrange(desc(Z_lm_L), desc(NG)) %>%
-    ungroup() %>%
-    mutate(
-      Overlap = case_when(
-        Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
-        Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
-        Z_lm_L >= overlap_threshold & Avg_Zscore_L < overlap_threshold ~ "Deletion Enhancer lm only",
-        Z_lm_L < overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
-        Z_lm_L <= -overlap_threshold & Avg_Zscore_L > -overlap_threshold ~ "Deletion Suppressor lm only",
-        Z_lm_L > -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
-        Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
-        Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
-        TRUE ~ "No Effect"
-      )
+      # NG, DB, SM values
+      NG = first(NG),
+      DB = first(DB),
+      SM = first(SM)
+    )
+
+  # Creating the final calculations and interactions dataframes with only required columns for csv output
+  calculations_df <- calculations %>%
+    select(
+      all_of(group_vars),
+      conc_num, conc_num_factor, conc_num_factor_factor,
+      N, NG, DB, SM,
+      mean_L, median_L, sd_L, se_L,
+      mean_K, median_K, sd_K, se_K,
+      mean_r, median_r, sd_r, se_r,
+      mean_AUC, median_AUC, sd_AUC, se_AUC,
+      Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
+      Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
+      WT_L, WT_K, WT_r, WT_AUC,
+      WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
+      Exp_L, Exp_K, Exp_r, Exp_AUC,
+      Delta_L, Delta_K, Delta_r, Delta_AUC,
+      Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
     )
 
-  # Return full data and correlation stats
+  interactions_df <- interactions %>%
+    select(
+      all_of(group_vars),
+      NG, DB, SM,
+      Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
+      Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
+      Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
+      Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC
+    )
+
+  # Use left_join to avoid dimension mismatch issues
   full_data <- calculations %>%
     left_join(interactions, by = group_vars)
-  
+
+  # Return full_data and the two required dataframes (calculations and interactions)
   return(list(
-    calculations = calculations,
-    interactions = interactions,
+    calculations = calculations_df,
+    interactions = interactions_df,
     full_data = full_data
   ))
 }