Bladeren bron

Return NULL from lm instead of refactor

Bryan Roessler 8 maanden geleden
bovenliggende
commit
30abced9ff
1 gewijzigde bestanden met toevoegingen van 50 en 46 verwijderingen
  1. 50 46
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 50 - 46
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -204,7 +204,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
     AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
   )
 
-  # Main statistics and shifts
   stats <- df %>%
     mutate(
       WT_L = mean_L,
@@ -236,14 +235,14 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
   stats <- stats %>%
     group_by(across(all_of(group_vars))) %>%
     mutate(
-      Raw_Shift_L = mean_L - bg_means$L,
-      Raw_Shift_K = mean_K - bg_means$K,
-      Raw_Shift_r = mean_r - bg_means$r,
-      Raw_Shift_AUC = mean_AUC - bg_means$AUC,
-      Z_Shift_L = Raw_Shift_L / bg_sd$L,
-      Z_Shift_K = Raw_Shift_K / bg_sd$K,
-      Z_Shift_r = Raw_Shift_r / bg_sd$r,
-      Z_Shift_AUC = Raw_Shift_AUC / bg_sd$AUC
+      Raw_Shift_L = mean_L[[1]] - bg_means$L,
+      Raw_Shift_K = mean_K[[1]] - bg_means$K,
+      Raw_Shift_r = mean_r[[1]] - bg_means$r,
+      Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
+      Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L,
+      Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K,
+      Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r,
+      Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC
     )
 
   stats <- stats %>%
@@ -256,7 +255,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Delta_K = mean_K - Exp_K,
       Delta_r = mean_r - Exp_r,
       Delta_AUC = mean_AUC - Exp_AUC
-    ) %>%
+    )
+
+  stats <- stats %>%
     mutate(
       Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
       Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
@@ -265,7 +266,15 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
     )
 
-  # Create linear models with proper error handling for insufficient data
+  stats <- stats %>%
+    mutate(
+      Zscore_L = Delta_L / WT_sd_L,
+      Zscore_K = Delta_K / WT_sd_K,
+      Zscore_r = Delta_r / WT_sd_r,
+      Zscore_AUC = Delta_AUC / WT_sd_AUC
+    )
+
+  # Create linear models with error handling for missing/insufficient data
   lms <- stats %>%
     summarise(
       lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NULL,
@@ -274,25 +283,23 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NULL
     )
 
-  # Join models and calculate interaction scores
   stats <- stats %>%
     left_join(lms, by = group_vars) %>%
     mutate(
-      lm_Score_L = sapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      lm_Score_K = sapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      lm_Score_r = sapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      R_Squared_L = sapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
-      R_Squared_K = sapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
-      R_Squared_r = sapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
-      R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
+      lm_Score_L = sapply(lm_L, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      lm_Score_K = sapply(lm_K, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      lm_Score_r = sapply(lm_r, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      R_Squared_L = sapply(lm_L, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
+      R_Squared_K = sapply(lm_K, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
+      R_Squared_r = sapply(lm_r, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
+      R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
       Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
       Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
       Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
       Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
     )
 
-  # Calculate Z-scores
   stats <- stats %>%
     mutate(
       Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
@@ -305,34 +312,32 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
     )
 
-  # Final output preparation
+  # Declare column order for output
   calculations <- stats %>%
-    select(
-      "OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
-      "mean_L", "mean_K", "mean_r", "mean_AUC",
-      "median_L", "median_K", "median_r", "median_AUC",
-      "sd_L", "sd_K", "sd_r", "sd_AUC",
-      "se_L", "se_K", "se_r", "se_AUC",
-      "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
-      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
-      "WT_L", "WT_K", "WT_r", "WT_AUC",
-      "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
-      "Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
-      "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
-      "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
-      "NG", "SM", "DB") %>%
+    select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
+           "mean_L", "mean_K", "mean_r", "mean_AUC",
+           "median_L", "median_K", "median_r", "median_AUC",
+           "sd_L", "sd_K", "sd_r", "sd_AUC",
+           "se_L", "se_K", "se_r", "se_AUC",
+           "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
+           "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
+           "WT_L", "WT_K", "WT_r", "WT_AUC",
+           "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
+           "Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
+           "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
+           "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
+           "NG", "SM", "DB") %>%
     ungroup()
 
   interactions <- stats %>%
-    select(
-      "OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
-      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
-      "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
-      "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
-      "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC",
-      "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
-      "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
-      "NG", "SM", "DB") %>%
+    select("OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
+           "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
+           "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
+           "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
+           "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC",
+           "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
+           "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
+           "NG", "SM", "DB") %>%
     arrange(desc(lm_Score_L)) %>%
     arrange(desc(NG)) %>%
     ungroup()
@@ -340,7 +345,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
   return(list(calculations = calculations, interactions = interactions))
 }
 
-
 generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
 
   message("Generating html and pdf plots for: ", file_name, ".pdf|html")