Bladeren bron

Align df handling closer to original script

Bryan Roessler 7 maanden geleden
bovenliggende
commit
4741996694
1 gewijzigde bestanden met toevoegingen van 33 en 47 verwijderingen
  1. 33 47
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 33 - 47
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -193,31 +193,13 @@ calculate_summary_stats <- function(df, variables, group_vars) {
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
 }
 
-calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC")) {
+calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"),
+  group_vars = c("OrfRep", "Gene", "num")) {
 
   # Calculate total concentration variables
   total_conc_num <- length(unique(df$conc_num))
 
-  # Pull the background means and standard deviations from zero concentration
-  bg_means <- list(
-    L = df %>% filter(conc_num == 0) %>% pull(mean_L) %>% first(),
-    K = df %>% filter(conc_num == 0) %>% pull(mean_K) %>% first(),
-    r = df %>% filter(conc_num == 0) %>% pull(mean_r) %>% first(),
-    AUC = df %>% filter(conc_num == 0) %>% pull(mean_AUC) %>% first()
-  )
-
-  bg_sd <- list(
-    L = df %>% filter(conc_num == 0) %>% pull(sd_L) %>% first(),
-    K = df %>% filter(conc_num == 0) %>% pull(sd_K) %>% first(),
-    r = df %>% filter(conc_num == 0) %>% pull(sd_r) %>% first(),
-    AUC = df %>% filter(conc_num == 0) %>% pull(sd_AUC) %>% first()
-  )
 
-  calculations <- calculate_summary_stats(
-    df = df,
-    variables = variables,
-    group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
-    )$df_with_stats
 
   calculations <- calculations %>%
     group_by(OrfRep, Gene, num) %>%
@@ -1064,22 +1046,21 @@ main <- function() {
 
     # Filter rows above delta background tolerance
     df_above_tolerance <- df %>% filter(DB == 1)
-    df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .)))
-    df_no_zeros <- df_na %>% filter(L > 0)
+    df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .))) # formerly X
+    df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
     
     # Save some constants
-    max_conc <- max(df$conc_num)
+    max_conc <- max(df$conc_num_factor)
     l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
     k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
 
     message("Calculating summary statistics before quality control")
-    ss <- calculate_summary_stats(
+    df_stats <- calculate_summary_stats(
       df = df,
       variables = summary_vars,
-      group_vars = c("conc_num", "conc_num_factor"))
-    df_stats <- ss$df_with_stats
+      group_vars = c("conc_num", "conc_num_factor"))$df_with_stats
     message("Filtering non-finite data")
-    df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE)
+    # df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE)
 
     message("Calculating summary statistics after quality control")
     ss <- calculate_summary_stats(
@@ -1089,7 +1070,7 @@ main <- function() {
     df_na_ss <- ss$summary_stats
     df_na_stats <- ss$df_with_stats
     write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
-    df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE)
+    # df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE)
 
     message("Calculating summary statistics after quality control excluding zero values")
     ss <- calculate_summary_stats(
@@ -1097,7 +1078,7 @@ main <- function() {
       variables = summary_vars,
       group_vars = c("conc_num", "conc_num_factor"))
     df_no_zeros_stats <- ss$df_with_stats
-    df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE)
+    # df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE)
 
     message("Filtering by 2SD of K")
     df_na_within_2sd_k <- df_na_stats %>%
@@ -1137,7 +1118,7 @@ main <- function() {
 
     frequency_delta_bg_plot_configs <- list(
       list(
-        df = df_filtered_stats,
+        df = df_stats,
         x_var = "delta_bg",
         y_var = NULL,
         plot_type = "density",
@@ -1148,7 +1129,7 @@ main <- function() {
         error_bar = FALSE,
         legend_position = "right"),
       list(
-        df = df_filtered_stats,
+        df = df_stats,
         x_var = "delta_bg",
         y_var = NULL,
         plot_type = "bar",
@@ -1185,27 +1166,27 @@ main <- function() {
 
     plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
       variables = summary_vars,
-      df_before = df_filtered_stats,
-      df_after = df_na_filtered_stats,
+      df_before = df_stats,
+      df_after = df_na_stats,
     )
 
     plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs(
       variables = summary_vars,
-      df_before = df_filtered_stats,
-      df_after = df_na_filtered_stats,
+      df_before = df_stats,
+      df_after = df_na_stats,
       plot_type = "box"
     )
 
     plate_analysis_no_zeros_plot_configs <- generate_plate_analysis_plot_configs(
       variables = summary_vars,
       stages = c("after"),  # Only after QC
-      df_after = df_no_zeros_filtered_stats,
+      df_after = df_no_zeros_stats,
     )
 
     plate_analysis_no_zeros_boxplot_configs <- generate_plate_analysis_plot_configs(
       variables = summary_vars,
       stages = c("after"),  # Only after QC
-      df_after = df_no_zeros_filtered_stats,
+      df_after = df_no_zeros_stats,
       plot_type = "box"
     )
 
@@ -1291,19 +1272,16 @@ main <- function() {
       message("Calculating summary statistics for background strain")
       ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
       summary_stats_bg <- ss_bg$summary_stats
-      # df_bg_stats <- ss_bg$df_with_stats
       write.csv(summary_stats_bg,
         file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
         row.names = FALSE)
       
       # Filter reference and deletion strains
-      # Formerly X2_RF (reference strains)
-      df_reference <- df_na_stats %>%
+      df_reference <- df_na_stats %>% # formerly X2_RF
         filter(OrfRep == strain) %>%
         mutate(SM = 0)
       
-      # Formerly X2 (deletion strains)
-      df_deletion <- df_na_stats %>%
+      df_deletion <- df_na_stats %>% # formerly X2
         filter(OrfRep != strain) %>%
         mutate(SM = 0)
 
@@ -1329,17 +1307,25 @@ main <- function() {
         ungroup()
 
       message("Calculating reference strain interaction scores")
-      reference_results <- calculate_interaction_scores(reference_strain, max_conc)
+      df_reference_stats <- calculate_summary_stats(
+        df = refrence_strain,
+        variables = interaction_vars,
+        group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
+        )$df_with_stats
+      reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, group_vars = c("OrfRep", "Gene", "num"))
       zscores_calculations_reference <- reference_results$calculations
       zscores_interactions_reference <- reference_results$interactions
-      # zscores_calculations_reference_joined <- reference_results$calculations_joined
       zscores_interactions_reference_joined <- reference_results$interactions_joined
 
       message("Calculating deletion strain(s) interactions scores")
-      deletion_results <- calculate_interaction_scores(deletion_strains, max_conc)
+      df_deletion_stats <- calculate_summary_stats(
+        df = deletion_strains,
+        variables = interaction_vars,
+        group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
+        )$df_with_stats
+      deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, group_vars = c("OrfRep"))
       zscores_calculations <- deletion_results$calculations
       zscores_interactions <- deletion_results$interactions
-      # zscores_calculations_joined <- deletion_results$calculations_joined
       zscores_interactions_joined <- deletion_results$interactions_joined
 
       # Writing Z-Scores to file
@@ -1358,7 +1344,7 @@ main <- function() {
       generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
 
       # Define conditions for enhancers and suppressors
-      # TODO Add to study config file?
+      # TODO Add to study config?
       threshold <- 2
       enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold
       suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold