Kaynağa Gözat

Correct grouping for deletion WT summarization

Bryan Roessler 6 ay önce
ebeveyn
işleme
a309130c39

+ 38 - 30
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -204,14 +204,22 @@ calculate_summary_stats <- function(df, variables, group_vars) {
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
 }
 
-calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) {
+calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) {
 
   max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
   total_conc_num <- length(unique(df$conc_num))
 
+  if (type == "reference") {
+    bg_group_vars <- c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
+    group_vars <- c("OrfRep", "Gene", "num", "Drug")
+  } else if (type == "deletion") {
+    bg_group_vars <- c("Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
+    group_vars <- c("OrfRep", "Gene", "Drug")
+  }
+
   # Calculate WT statistics from df_bg
   wt_stats <- df_bg %>%
-    group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>%
+    group_by(across(all_of(bg_group_vars))) %>%
     summarise(
       WT_L = mean(mean_L, na.rm = TRUE),
       WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -224,6 +232,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       .groups = "drop"
     )
 
+  # Join WT statistics to df
+  df <- df %>%
+    left_join(wt_stats, by = bg_group_vars)
+
   # Compute mean values at zero concentration
   mean_zeroes <- df %>%
     filter(conc_num == 0) %>%
@@ -236,9 +248,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       .groups = "drop"
     )
 
-  # Join WT statistics to df
   df <- df %>%
-    left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>%
     left_join(mean_zeroes, by = c(group_vars))
     
   # Calculate Raw Shifts and Z Shifts
@@ -702,10 +712,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
       total_spots <- grid_layout$nrow * grid_layout$ncol
       num_plots <- length(static_plots)
 
-      if (num_plots < total_spots) {
-        message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
-        static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
-      }
+      # if (num_plots < total_spots) {
+      #   message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
+      #   static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
+      # }
+
       grid.arrange(
         grobs = static_plots,
         ncol = grid_layout$ncol,
@@ -775,7 +786,6 @@ generate_scatter_plot <- function(plot, config) {
         linewidth = ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1)
       )
   }
-
   
   # Add SD Bands if specified
   if (!is.null(config$sd_band)) {
@@ -1538,25 +1548,22 @@ main <- function() {
           .groups = "drop"
         )
 
-      message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
-      df_reference_interaction_stats <- calculate_summary_stats(
-        df = df_reference,
-        variables = c("L", "K", "r", "AUC"),
-        group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
-        )$df_with_stats
+      # message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
+      # df_reference_interaction_stats <- calculate_summary_stats(
+      #   df = df_reference,
+      #   variables = c("L", "K", "r", "AUC"),
+      #   group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
+      #   )$df_with_stats
       
-      message("Calculating reference strain interaction scores")
-      reference_results <- calculate_interaction_scores(df_reference_interaction_stats,
-        df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
-      df_reference_calculations <- reference_results$calculations
-      df_reference_interactions <- reference_results$interactions
-      df_reference_interactions_joined <- reference_results$full_data
-      write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
-      write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
-
-      message("Generating reference interaction plots")
-      reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
-      generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
+      # # message("Calculating reference strain interaction scores")
+      # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
+      # df_reference_interactions_joined <- reference_results$full_data
+      # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
+      # write.csv(reference_results$interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
+
+      # # message("Generating reference interaction plots")
+      # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
+      # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
 
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       df_deletion <- df_na_stats %>% # formerly X2
@@ -1578,13 +1585,14 @@ main <- function() {
         )$df_with_stats
 
       message("Calculating deletion strain(s) interactions scores")
-      deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
-      df_calculations <- deletion_results$calculations
+      deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion")
       df_interactions <- deletion_results$interactions
       df_interactions_joined <- deletion_results$full_data
-      write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
+      write.csv(deletion_results$calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
       write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
 
+      print(df_interactions_joined, n = 20, width = 100000)
+
       message("Generating deletion interaction plots")
       deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)