10 月之前 · 30abced9ff
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -204,7 +204,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				     AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
			
 
				   )
			
 
				 
			
 
				-  # Main statistics and shifts
			
 
				   stats <- df %>%
			
 
				     mutate(
			
 
				       WT_L = mean_L,
			
@@ -236,14 +235,14 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				   stats <- stats %>%
			
 
				     group_by(across(all_of(group_vars))) %>%
			
 
				     mutate(
			
 
				-      Raw_Shift_L = mean_L - bg_means$L,
			
 
				-      Raw_Shift_K = mean_K - bg_means$K,
			
 
				-      Raw_Shift_r = mean_r - bg_means$r,
			
 
				-      Raw_Shift_AUC = mean_AUC - bg_means$AUC,
			
 
				-      Z_Shift_L = Raw_Shift_L / bg_sd$L,
			
 
				-      Z_Shift_K = Raw_Shift_K / bg_sd$K,
			
 
				-      Z_Shift_r = Raw_Shift_r / bg_sd$r,
			
 
				-      Z_Shift_AUC = Raw_Shift_AUC / bg_sd$AUC
			
 
				+      Raw_Shift_L = mean_L[[1]] - bg_means$L,
			
 
				+      Raw_Shift_K = mean_K[[1]] - bg_means$K,
			
 
				+      Raw_Shift_r = mean_r[[1]] - bg_means$r,
			
 
				+      Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
			
 
				+      Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L,
			
 
				+      Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K,
			
 
				+      Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r,
			
 
				+      Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC
			
 
				     )
			
 
				 
			
 
				   stats <- stats %>%
			
@@ -256,7 +255,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				       Delta_K = mean_K - Exp_K,
			
 
				       Delta_r = mean_r - Exp_r,
			
 
				       Delta_AUC = mean_AUC - Exp_AUC
			
 
				-    ) %>%
			
 
				+    )
			
 
				+
			
 
				+  stats <- stats %>%
			
 
				     mutate(
			
 
				       Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
			
 
				       Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
			
@@ -265,7 +266,15 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				       Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
			
 
				     )
			
 
				 
			
 
				-  # Create linear models with proper error handling for insufficient data
			
 
				+  stats <- stats %>%
			
 
				+    mutate(
			
 
				+      Zscore_L = Delta_L / WT_sd_L,
			
 
				+      Zscore_K = Delta_K / WT_sd_K,
			
 
				+      Zscore_r = Delta_r / WT_sd_r,
			
 
				+      Zscore_AUC = Delta_AUC / WT_sd_AUC
			
 
				+    )
			
 
				+
			
 
				+  # Create linear models with error handling for missing/insufficient data
			
 
				   lms <- stats %>%
			
 
				     summarise(
			
 
				       lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NULL,
			
@@ -274,25 +283,23 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				       lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NULL
			
 
				     )
			
 
				 
			
 
				-  # Join models and calculate interaction scores
			
 
				   stats <- stats %>%
			
 
				     left_join(lms, by = group_vars) %>%
			
 
				     mutate(
			
 
				-      lm_Score_L = sapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      lm_Score_K = sapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      lm_Score_r = sapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      R_Squared_L = sapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				-      R_Squared_K = sapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				-      R_Squared_r = sapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				-      R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				+      lm_Score_L = sapply(lm_L, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				+      lm_Score_K = sapply(lm_K, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				+      lm_Score_r = sapply(lm_r, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				+      lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				+      R_Squared_L = sapply(lm_L, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
			
 
				+      R_Squared_K = sapply(lm_K, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
			
 
				+      R_Squared_r = sapply(lm_r, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
			
 
				+      R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) summary(model)$r.squared else NA),
			
 
				       Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
			
 
				       Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
			
 
				       Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
			
 
				       Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
			
 
				     )
			
 
				 
			
 
				-  # Calculate Z-scores
			
 
				   stats <- stats %>%
			
 
				     mutate(
			
 
				       Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
			
@@ -305,34 +312,32 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				       Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
			
 
				     )
			
 
				 
			
 
				-  # Final output preparation
			
 
				+  # Declare column order for output
			
 
				   calculations <- stats %>%
			
 
				-    select(
			
 
				-      "OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
			
 
				-      "mean_L", "mean_K", "mean_r", "mean_AUC",
			
 
				-      "median_L", "median_K", "median_r", "median_AUC",
			
 
				-      "sd_L", "sd_K", "sd_r", "sd_AUC",
			
 
				-      "se_L", "se_K", "se_r", "se_AUC",
			
 
				-      "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
			
 
				-      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
			
 
				-      "WT_L", "WT_K", "WT_r", "WT_AUC",
			
 
				-      "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
			
 
				-      "Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
			
 
				-      "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
			
 
				-      "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
			
 
				-      "NG", "SM", "DB") %>%
			
 
				+    select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
			
 
				+           "mean_L", "mean_K", "mean_r", "mean_AUC",
			
 
				+           "median_L", "median_K", "median_r", "median_AUC",
			
 
				+           "sd_L", "sd_K", "sd_r", "sd_AUC",
			
 
				+           "se_L", "se_K", "se_r", "se_AUC",
			
 
				+           "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
			
 
				+           "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
			
 
				+           "WT_L", "WT_K", "WT_r", "WT_AUC",
			
 
				+           "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
			
 
				+           "Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
			
 
				+           "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
			
 
				+           "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
			
 
				+           "NG", "SM", "DB") %>%
			
 
				     ungroup()
			
 
				 
			
 
				   interactions <- stats %>%
			
 
				-    select(
			
 
				-      "OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
			
 
				-      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
			
 
				-      "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
			
 
				-      "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
			
 
				-      "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC",
			
 
				-      "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
			
 
				-      "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
			
 
				-      "NG", "SM", "DB") %>%
			
 
				+    select("OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
			
 
				+           "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
			
 
				+           "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
			
 
				+           "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
			
 
				+           "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC",
			
 
				+           "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
			
 
				+           "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
			
 
				+           "NG", "SM", "DB") %>%
			
 
				     arrange(desc(lm_Score_L)) %>%
			
 
				     arrange(desc(NG)) %>%
			
 
				     ungroup()
			
@@ -340,7 +345,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				   return(list(calculations = calculations, interactions = interactions))
			
 
				 }
			
 
				 
			
 
				-
			
 
				 generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
			
 
				 
			
 
				   message("Generating html and pdf plots for: ", file_name, ".pdf|html")