瀏覽代碼

Improve the interactions df

Bryan Roessler 8 月之前
父節點
當前提交
7c0ed3eda1
共有 2 個文件被更改,包括 48 次插入52 次删除
  1. 45 49
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R
  2. 3 3
      qhtcp-workflow/qhtcp-workflow

+ 45 - 49
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -187,7 +187,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
   # Calculate total concentration variables
   total_conc_num <- length(unique(df$conc_num))
-  num_non_removed_concs <- total_conc_num - sum(df$DB, na.rm = TRUE) - 1
 
   # Pull the background means and standard deviations from zero concentration
   bg_means <- list(
@@ -204,6 +203,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
     AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
   )
 
+  # Grab these values from the original df before mutating the new stats
   stats <- df %>%
     mutate(
       WT_L = mean_L,
@@ -214,9 +214,11 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       WT_sd_K = sd_K,
       WT_sd_r = sd_r,
       WT_sd_AUC = sd_AUC
-    ) %>%
+    )
+
+  stats <- stats %>%
     group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>%
-    mutate(
+    summarise(
       N = sum(!is.na(L)),
       NG = sum(NG, na.rm = TRUE),
       DB = sum(DB, na.rm = TRUE),
@@ -229,8 +231,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
         sd = ~sd(., na.rm = TRUE),
         se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
       ), .names = "{.fn}_{.col}")
-    ) %>%
-    ungroup()
+    )
 
   stats <- stats %>%
     group_by(OrfRep, Gene, num) %>%
@@ -274,51 +275,61 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Zscore_AUC = Delta_AUC / WT_sd_AUC
     )
 
-  stats <- stats %>%
-    mutate(
+  # Calculate linear models
+  lm_L <- lm(Delta_L ~ conc_num_factor, data = stats)
+  lm_K <- lm(Delta_K ~ conc_num_factor, data = stats)
+  lm_r <- lm(Delta_r ~ conc_num_factor, data = stats)
+  lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = stats)
+
+  interactions <- stats %>%
+    transmute(
+      OrfRep = first(OrfRep),
+      Gene = first(Gene),
+      Raw_Shift_L = first(Raw_Shift_L),
+      Raw_Shift_K = first(Raw_Shift_K),
+      Raw_Shift_r = first(Raw_Shift_r),
+      Raw_Shift_AUC = first(Raw_Shift_AUC),
+      Z_Shift_L = first(Z_Shift_L),
+      Z_Shift_K = first(Z_Shift_K),
+      Z_Shift_r = first(Z_Shift_r),
+      Z_Shift_AUC = first(Z_Shift_AUC),
       Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
       Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
       Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
-      Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
-    )
-
-  # Calculate linear models and store in own df for now
-  lms <- stats %>%
-    reframe(
-      L = lm(Delta_L ~ conc_num_factor),
-      K = lm(Delta_K ~ conc_num_factor),
-      r = lm(Delta_r ~ conc_num_factor),
-      AUC = lm(Delta_AUC ~ conc_num_factor)
+      Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE),
+      lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
+      lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
+      lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
+      lm_Score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1],
+      R_Squared_L = summary(lm_L)$r.squared,
+      R_Squared_K = summary(lm_K)$r.squared,
+      R_Squared_r = summary(lm_r)$r.squared,
+      R_Squared_AUC = summary(lm_AUC)$r.squared,
+      NG = sum(NG, na.rm = TRUE),
+      DB = sum(DB, na.rm = TRUE),
+      SM = sum(SM, na.rm = TRUE)
     )
 
-  stats <- stats %>%
+  num_non_removed_concs <- total_conc_num - sum(stats$DB, na.rm = TRUE) - 1
+  
+  interactions <- interactions %>%
     mutate(
       Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
       Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
       Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
       Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
-      lm_Score_L = max_conc * coef(lms$L)[2] + coef(lms$L)[1],
-      lm_Score_K = max_conc * coef(lms$K)[2] + coef(lms$K)[1],
-      lm_Score_r = max_conc * coef(lms$r)[2] + coef(lms$r)[1],
-      lm_Score_AUC = max_conc * coef(lms$AUC)[2] + coef(lms$AUC)[1],
-      R_Squared_L = summary(lms$L)$r.squared,
-      R_Squared_K = summary(lms$K)$r.squared,
-      R_Squared_r = summary(lms$r)$r.squared,
-      R_Squared_AUC = summary(lms$AUC)$r.squared
-    )
-
-  stats <- stats %>%
-    mutate(
       Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
       Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
       Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
       Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
-    )
+    ) %>%
+    arrange(desc(Z_lm_L)) %>%
+    arrange(desc(NG))
 
   # Declare column order for output
   calculations <- stats %>%
     select(
-      "OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
+      "OrfRep", "Gene", "conc_num", "conc_num_factor", "N",
       "mean_L", "mean_K", "mean_r", "mean_AUC",
       "median_L", "median_K", "median_r", "median_AUC",
       "sd_L", "sd_K", "sd_r", "sd_AUC",
@@ -332,23 +343,8 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
       "NG", "SM", "DB")
     
-  interactions <- stats %>%
-    select(
-      "OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
-      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
-      "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
-      "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
-      "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC",
-      "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
-      "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
-      "NG", "SM", "DB") %>%
-    arrange(desc(lm_Score_L)) %>%
-    arrange(desc(NG))
-
-  print(df, n = 1)
-  print(calculations, n = 1)
-  df <- df %>% select(-any_of(setdiff(names(calculations), group_vars)))
-  df <- left_join(df, calculations, by = group_vars)
+  df <- df %>% select(-any_of(setdiff(names(calculations), OrfRep, Gene, num, conc_num, conc_num_factor)))
+  df <- left_join(df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
   # df <- df %>% select(-any_of(setdiff(names(interactions), group_vars)))
   # df <- left_join(df, interactions, by = group_vars)
 

+ 3 - 3
qhtcp-workflow/qhtcp-workflow

@@ -681,10 +681,10 @@ install_dependencies() {
   echo "If you do not have sudo access, you may want to use toolbox"
   case "$(uname -s)" in
     Linux*|CYGWIN*|MINGW*)
-      if hash dnf &>/dev/null; then
+      if command -v dnf &>/dev/null; then
         ask "Detected Linux RPM platform, continue?" || return 1
         sudo dnf install "${depends_rpm[@]}"
-      elif hash apt &>/dev/null; then
+      elif command -v apt &>/dev/null; then
         ask "Detected Linux DEB platform, continue?" || return 1
         sudo apt install "${depends_deb[@]}"
       else
@@ -753,7 +753,7 @@ install_dependencies() {
   fi
   echo ""
 
-  hash "$MATLAB" &>/dev/null || echo "You will also need MATLAB installed for GUI modules"
+  command -v "$MATLAB" &>/dev/null || echo "You will also need MATLAB installed for GUI modules"
 }