Quellcode durchsuchen

Specify output columns for interaction scores

Bryan Roessler vor 7 Monaten
Ursprung
Commit
ba8e4ced3e
1 geänderte Dateien mit 61 neuen und 41 gelöschten Zeilen
  1. 61 41
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 61 - 41
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -201,7 +201,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
     AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
   )
 
-  interaction_scores <- df %>%
+  calculations <- df %>%
     mutate(
       WT_L = df$mean_L,
       WT_K = df$mean_K,
@@ -229,20 +229,20 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       ) %>%
     ungroup()
 
-  interaction_scores <- interaction_scores %>%
+  calculations <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
       mutate(
         Raw_Shift_L = mean_L[[1]] - bg_means$L,
         Raw_Shift_K = mean_K[[1]] - bg_means$K,
         Raw_Shift_r = mean_r[[1]] - bg_means$r,
         Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
-        Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]],
-        Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]],
-        Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]],
-        Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]]
+        Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L,
+        Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K,
+        Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r,
+        Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC
       )
 
-  interaction_scores <- interaction_scores %>%
+  calculations <- calculations %>%
     mutate(
       Exp_L = WT_L + Raw_Shift_L,
       Delta_L = mean_L - Exp_L,
@@ -254,7 +254,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Delta_AUC = mean_AUC - Exp_AUC
     )
 
-  interaction_scores <- interaction_scores %>%
+  calculations <- calculations %>%
     mutate(
       Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
       Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
@@ -263,8 +263,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
     )
 
-  # Calculate linear models and interaction scores
-  interaction_scores <- interaction_scores %>%
+  interactions <- calculations %>%
     mutate(
       lm_L = lm(Delta_L ~ conc_num_factor),
       lm_K = lm(Delta_K ~ conc_num_factor),
@@ -276,46 +275,63 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
       Zscore_AUC = Delta_AUC / WT_sd_AUC
     )
 
-  interaction_scores <- interaction_scores %>%
+  interactions <- interactions %>%
     mutate(
+      lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
+      lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
+      lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
+      lm_Score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1],
+      r_squared_L = summary(lm_L)$r.squared,
+      r_squared_K = summary(lm_K)$r.squared,
+      r_squared_r = summary(lm_r)$r.squared,
+      r_squared_AUC = summary(lm_AUC)$r.squared,
       Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
       Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
       Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
       Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
     )
 
-  interaction_scores_all <- interaction_scores %>%
+  interactions <- interactions %>%
     mutate(
       Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
       Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
       Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
       Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
-      lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
-      lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
-      lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
-      lm_Score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1],
-      r_squared_L = summary(lm_L)$r.squared,
-      r_squared_K = summary(lm_K)$r.squared,
-      r_squared_r = summary(lm_r)$r.squared,
-      r_squared_AUC = summary(lm_AUC)$r.squared
-    )
-
-  # Calculate Z_lm for each variable
-  interaction_scores_all <- interaction_scores_all %>%
-    mutate(
       Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
       Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
       Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
       Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
     )
 
+  calculations <- calculations %>%
+    select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
+      "mean_L", "mean_K", "mean_r", "mean_AUC",
+      "median_L", "median_K", "median_r", "median_AUC",
+      "sd_L", "sd_K", "sd_r", "sd_AUC",
+      "se_L", "se_K", "se_r", "se_AUC",
+      "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
+      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
+      "WT_L", "WT_K", "WT_r", "WT_AUC", "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
+      "Exp_L", "Exp_K", "Exp_r", "Exp_AUC", "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
+      "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
+      "NG", "SM", "DB") %>%
+    ungroup()
+
   # Arrange results by Z_lm_L and NG
-  interaction_scores_all <- interaction_scores_all %>%
+  interactions <- interactions %>%
+    select("OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K",  "Raw_Shift_AUC", "Raw_Shift_r",
+      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
+      "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
+      "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
+      "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
+      "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
+      "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
+      "NG", "SM", "DB") %>%
     arrange(desc(lm_Score_L)) %>%
     arrange(desc(NG)) %>%
     ungroup()
 
-  return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
+  return(list(calculations = calculations, interactions = interaction))
 }
 
 generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
@@ -674,6 +690,7 @@ main <- function() {
 
     # Print quality control graphs before removing data due to contamination and
     # adjusting missing data to max theoretical values
+    message("Generating QC plot configurations")
     l_vs_k_plots <- list(
       list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
         title = "Raw L vs K before QC",
@@ -864,10 +881,10 @@ main <- function() {
       print(head(deletion_strains))
       deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables)
       
-      zscores_calculations_reference <- reference_results$zscores_calculations
-      zscores_interactions_reference <- reference_results$zscores_interactions
-      zscores_calculations <- deletion_results$zscores_calculations
-      zscores_interactions <- deletion_results$zscores_interactions
+      zscores_calculations_reference <- reference_results$calculations
+      zscores_interactions_reference <- reference_results$interactions
+      zscores_calculations <- deletion_results$calculations
+      zscores_interactions <- deletion_results$interactions
       
       # Writing Z-Scores to file
       write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
@@ -946,9 +963,20 @@ main <- function() {
       generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
         plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
       
-
+      interaction_scores_filtered
       
 
+
+
+      # lm_summaries <- lapply(lm_list, summary)
+      # correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
+      # generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
+    })
+  })
+}
+main()
+
+
       # # Correlation plots
       # lm_list <- list(
       #   lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
@@ -957,12 +985,4 @@ main <- function() {
       #   lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
       #   lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
       #   lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
-      # )
-
-      lm_summaries <- lapply(lm_list, summary)
-      correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
-      generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
-    })
-  })
-}
-main()
+      # )