Browse Source

Correct grouping for deletion WT summarization

Bryan Roessler 6 tháng trước cách đây
mục cha
commit
a309130c39

+ 38 - 30
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -204,14 +204,22 @@ calculate_summary_stats <- function(df, variables, group_vars) {
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
   return(list(summary_stats = summary_stats, df_with_stats = df_joined))
 }
 }
 
 
-calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) {
+calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) {
 
 
   max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
   max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
   total_conc_num <- length(unique(df$conc_num))
   total_conc_num <- length(unique(df$conc_num))
 
 
+  if (type == "reference") {
+    bg_group_vars <- c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
+    group_vars <- c("OrfRep", "Gene", "num", "Drug")
+  } else if (type == "deletion") {
+    bg_group_vars <- c("Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
+    group_vars <- c("OrfRep", "Gene", "Drug")
+  }
+
   # Calculate WT statistics from df_bg
   # Calculate WT statistics from df_bg
   wt_stats <- df_bg %>%
   wt_stats <- df_bg %>%
-    group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>%
+    group_by(across(all_of(bg_group_vars))) %>%
     summarise(
     summarise(
       WT_L = mean(mean_L, na.rm = TRUE),
       WT_L = mean(mean_L, na.rm = TRUE),
       WT_sd_L = mean(sd_L, na.rm = TRUE),
       WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -224,6 +232,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       .groups = "drop"
       .groups = "drop"
     )
     )
 
 
+  # Join WT statistics to df
+  df <- df %>%
+    left_join(wt_stats, by = bg_group_vars)
+
   # Compute mean values at zero concentration
   # Compute mean values at zero concentration
   mean_zeroes <- df %>%
   mean_zeroes <- df %>%
     filter(conc_num == 0) %>%
     filter(conc_num == 0) %>%
@@ -236,9 +248,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       .groups = "drop"
       .groups = "drop"
     )
     )
 
 
-  # Join WT statistics to df
   df <- df %>%
   df <- df %>%
-    left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>%
     left_join(mean_zeroes, by = c(group_vars))
     left_join(mean_zeroes, by = c(group_vars))
     
     
   # Calculate Raw Shifts and Z Shifts
   # Calculate Raw Shifts and Z Shifts
@@ -702,10 +712,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
       total_spots <- grid_layout$nrow * grid_layout$ncol
       total_spots <- grid_layout$nrow * grid_layout$ncol
       num_plots <- length(static_plots)
       num_plots <- length(static_plots)
 
 
-      if (num_plots < total_spots) {
-        message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
-        static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
-      }
+      # if (num_plots < total_spots) {
+      #   message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
+      #   static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
+      # }
+
       grid.arrange(
       grid.arrange(
         grobs = static_plots,
         grobs = static_plots,
         ncol = grid_layout$ncol,
         ncol = grid_layout$ncol,
@@ -775,7 +786,6 @@ generate_scatter_plot <- function(plot, config) {
         linewidth = ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1)
         linewidth = ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1)
       )
       )
   }
   }
-
   
   
   # Add SD Bands if specified
   # Add SD Bands if specified
   if (!is.null(config$sd_band)) {
   if (!is.null(config$sd_band)) {
@@ -1538,25 +1548,22 @@ main <- function() {
           .groups = "drop"
           .groups = "drop"
         )
         )
 
 
-      message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
-      df_reference_interaction_stats <- calculate_summary_stats(
-        df = df_reference,
-        variables = c("L", "K", "r", "AUC"),
-        group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
-        )$df_with_stats
+      # message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
+      # df_reference_interaction_stats <- calculate_summary_stats(
+      #   df = df_reference,
+      #   variables = c("L", "K", "r", "AUC"),
+      #   group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
+      #   )$df_with_stats
       
       
-      message("Calculating reference strain interaction scores")
-      reference_results <- calculate_interaction_scores(df_reference_interaction_stats,
-        df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
-      df_reference_calculations <- reference_results$calculations
-      df_reference_interactions <- reference_results$interactions
-      df_reference_interactions_joined <- reference_results$full_data
-      write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
-      write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
-
-      message("Generating reference interaction plots")
-      reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
-      generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
+      # # message("Calculating reference strain interaction scores")
+      # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
+      # df_reference_interactions_joined <- reference_results$full_data
+      # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
+      # write.csv(reference_results$interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
+
+      # # message("Generating reference interaction plots")
+      # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
+      # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
 
 
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       df_deletion <- df_na_stats %>% # formerly X2
       df_deletion <- df_na_stats %>% # formerly X2
@@ -1578,13 +1585,14 @@ main <- function() {
         )$df_with_stats
         )$df_with_stats
 
 
       message("Calculating deletion strain(s) interactions scores")
       message("Calculating deletion strain(s) interactions scores")
-      deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
-      df_calculations <- deletion_results$calculations
+      deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion")
       df_interactions <- deletion_results$interactions
       df_interactions <- deletion_results$interactions
       df_interactions_joined <- deletion_results$full_data
       df_interactions_joined <- deletion_results$full_data
-      write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
+      write.csv(deletion_results$calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
       write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
       write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
 
 
+      print(df_interactions_joined, n = 20, width = 100000)
+
       message("Generating deletion interaction plots")
       message("Generating deletion interaction plots")
       deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
       deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)