浏览代码

Fix Z_lm calculations

Bryan Roessler 7 月之前
父节点
当前提交
57ff7698a3
共有 1 个文件被更改,包括 25 次插入10 次删除
  1. 25 10
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 25 - 10
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -191,14 +191,14 @@ calculate_summary_stats <- function(df, variables, group_vars) {
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
 }
 
-calculate_interaction_scores <- function(df, max_conc, bg_stats, variables = c("L", "K", "r", "AUC"),
+calculate_interaction_scores <- function(df, max_conc, bg_stats,
   group_vars = c("OrfRep", "Gene", "num")) {
 
   # Calculate total concentration variables
   total_conc_num <- length(unique(df$conc_num))
 
   calculations <- df %>%
-    group_by(OrfRep, Gene, num) %>%
+    group_by(across(all_of(group_vars))) %>%
     mutate(
       NG = sum(NG, na.rm = TRUE),
       DB = sum(DB, na.rm = TRUE),
@@ -260,19 +260,34 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, variables = c("
       R_Squared_L = map_dbl(gene_lm_L, ~ summary(.x)$r.squared),
       R_Squared_K = map_dbl(gene_lm_K, ~ summary(.x)$r.squared),
       R_Squared_r = map_dbl(gene_lm_r, ~ summary(.x)$r.squared),
-      R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared),
-
-      # Calculate Z_lm_* Scores
-      Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
-      Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
-      Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
-      Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
+      R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared)
     ) %>%
     ungroup()
 
+  # Calculate overall mean and SD for lm_Score_* variables
+  lm_means_sds <- calculations %>%
+    summarise(
+      lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
+      lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
+      lm_mean_K = mean(lm_Score_K, na.rm = TRUE),
+      lm_sd_K = sd(lm_Score_K, na.rm = TRUE),
+      lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
+      lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
+      lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
+      lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
+    )
+
+  calculations <- calculations %>%
+  mutate(
+    Z_lm_L = (lm_Score_L - lm_means_sds$lm_mean_L) / lm_means_sds$lm_sd_L,
+    Z_lm_K = (lm_Score_K - lm_means_sds$lm_mean_K) / lm_means_sds$lm_sd_K,
+    Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r,
+    Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC
+  )
+
   # Summarize some of the stats
   interactions <- calculations %>%
-    group_by(OrfRep, Gene, num) %>%
+    group_by(across(all_of(group_vars))) %>%
     mutate(
       # Calculate raw shifts
       Raw_Shift_L = first(Raw_Shift_L),