瀏覽代碼

Put bg_stats into its own df to be reused

Bryan Roessler 7 月之前
父節點
當前提交
81174db065
共有 1 個文件被更改,包括 28 次插入16 次删除
  1. 28 16
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 28 - 16
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -193,15 +193,13 @@ calculate_summary_stats <- function(df, variables, group_vars) {
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
 }
 
-calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"),
+calculate_interaction_scores <- function(df, max_conc, bg_stats, variables = c("L", "K", "r", "AUC"),
   group_vars = c("OrfRep", "Gene", "num")) {
 
   # Calculate total concentration variables
   total_conc_num <- length(unique(df$conc_num))
 
-
-
-  calculations <- calculations %>%
+  calculations <- df %>%
     group_by(OrfRep, Gene, num) %>%
     mutate(
       NG = sum(NG, na.rm = TRUE),
@@ -210,14 +208,14 @@ calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "
       num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
 
       # Store the background data
-      WT_L = bg_means$L,
-      WT_K = bg_means$K,
-      WT_r = bg_means$r,
-      WT_AUC = bg_means$AUC,
-      WT_sd_L = bg_sd$L,
-      WT_sd_K = bg_sd$K,
-      WT_sd_r = bg_sd$r,
-      WT_sd_AUC = bg_sd$AUC,
+      WT_L = bg_stats$WT_L,
+      WT_K = bg_stats$WT_K,
+      WT_r = bg_stats$WT_r,
+      WT_AUC = bg_stats$WT_AUC,
+      WT_sd_L = bg_stats$WT_sd_L,
+      WT_sd_K = bg_stats$WT_sd_K,
+      WT_sd_r = bg_stats$WT_sd_r,
+      WT_sd_AUC = bg_stats$WT_sd_AUC,
       Raw_Shift_L = first(mean_L) - bg_means$L,
       Raw_Shift_K = first(mean_K) - bg_means$K,
       Raw_Shift_r = first(mean_r) - bg_means$r,
@@ -1050,7 +1048,7 @@ main <- function() {
     df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
     
     # Save some constants
-    max_conc <- max(df$conc_num_factor)
+    max_conc <- max(as.numeric(df$conc_num_factor))
     l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
     k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
 
@@ -1072,6 +1070,20 @@ main <- function() {
     write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
     # df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE)
 
+    # Pull the background means and standard deviations from zero concentration
+    bg_stats <- df_na_stats %>%
+      filter(conc_num == 0) %>%
+      summarise(
+        WT_L = first(mean_L),
+        WT_K = first(mean_K),
+        WT_r = first(mean_r),
+        WT_AUC = first(mean_AUC),
+        WT_sd_L = first(sd_L),
+        WT_sd_K = first(sd_K),
+        WT_sd_r = first(sd_r),
+        WT_sd_AUC = first(sd_AUC)
+      )
+
     message("Calculating summary statistics after quality control excluding zero values")
     ss <- calculate_summary_stats(
       df = df_no_zeros,
@@ -1308,11 +1320,11 @@ main <- function() {
 
       message("Calculating reference strain interaction scores")
       df_reference_stats <- calculate_summary_stats(
-        df = refrence_strain,
+        df = reference_strain,
         variables = interaction_vars,
         group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
         )$df_with_stats
-      reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, group_vars = c("OrfRep", "Gene", "num"))
+      reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
       zscores_calculations_reference <- reference_results$calculations
       zscores_interactions_reference <- reference_results$interactions
       zscores_interactions_reference_joined <- reference_results$interactions_joined
@@ -1323,7 +1335,7 @@ main <- function() {
         variables = interaction_vars,
         group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
         )$df_with_stats
-      deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, group_vars = c("OrfRep"))
+      deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep"))
       zscores_calculations <- deletion_results$calculations
       zscores_interactions <- deletion_results$interactions
       zscores_interactions_joined <- deletion_results$interactions_joined