浏览代码

Add missing calculation columns for Delta_L error bars

Bryan Roessler 6 月之前
父节点
当前提交
320338316c
共有 2 个文件被更改,包括 59 次插入36 次删除
  1. 58 36
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R
  2. 1 0
      qhtcp-workflow/qhtcp-workflow

+ 58 - 36
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -347,6 +347,25 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
     }) %>%
     ungroup()
 
+  # For interaction plot error bars
+  delta_means <- calculations %>%
+    group_by(across(all_of(group_vars))) %>%
+    summarise(
+      mean_Delta_L = mean(Delta_L, na.rm = TRUE),
+      mean_Delta_K = mean(Delta_K, na.rm = TRUE),
+      mean_Delta_r = mean(Delta_r, na.rm = TRUE),
+      mean_Delta_AUC = mean(Delta_AUC, na.rm = TRUE),
+      sd_Delta_L = sd(Delta_L, na.rm = TRUE),
+      sd_Delta_K = sd(Delta_K, na.rm = TRUE),
+      sd_Delta_r = sd(Delta_r, na.rm = TRUE),
+      sd_Delta_AUC = sd(Delta_AUC, na.rm = TRUE),
+
+      .groups = "drop"
+    )
+
+  calculations <- calculations %>%
+    left_join(delta_means, by = group_vars)
+
   # Summary statistics for lm scores
   lm_means_sds <- calculations %>%
     summarise(
@@ -462,6 +481,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
       Exp_L, Exp_K, Exp_r, Exp_AUC,
       Delta_L, Delta_K, Delta_r, Delta_AUC,
+      mean_Delta_L, mean_Delta_K, mean_Delta_r, mean_Delta_AUC,
       Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
     )
 
@@ -548,8 +568,8 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
         
         # Print rows being filtered out
         if (nrow(out_of_bounds_df) > 0) {
-          message("Filtered out rows outside y-limits:")
-          print(out_of_bounds_df)
+          message("# of filtered rows outside y-limits (for plotting): ", nrow(out_of_bounds_df))
+          # print(out_of_bounds_df)
         }
 
         # Filter the valid data for plotting
@@ -646,11 +666,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
       }
 
       # Convert ggplot to plotly for interactive version
-      plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
+      # plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
 
       # Store both static and interactive versions
       static_plots[[i]] <- plot
-      plotly_plots[[i]] <- plotly_plot
+      # plotly_plots[[i]] <- plotly_plot
     }
 
     # Print the plots in the current group to the PDF
@@ -987,7 +1007,7 @@ generate_interaction_plot_configs <- function(df, type) {
         df = group_data,
         plot_type = "scatter",
         x_var = "conc_num_factor_factor",
-        y_var = var,
+        y_var = paste0("Delta_", var),
         x_label = unique(group_data$Drug)[1],
         title = paste(OrfRepTitle, Gene, num, sep = "      "),
         coord_cartesian = y_limits,
@@ -1019,10 +1039,11 @@ generate_interaction_plot_configs <- function(df, type) {
     }
   }
 
+  # Return plot configs
   return(list(
-    list(grid_layout = list(ncol = 2), plots = stats_plot_configs),  # nrow will be calculated dynamically
-    list(grid_layout = list(ncol = 2), plots = stats_boxplot_configs),  # nrow will be calculated dynamically
-    list(grid_layout = list(ncol = 4), plots = delta_plot_configs)  # nrow will be calculated dynamically
+    list(grid_layout = list(ncol = 2), plots = stats_plot_configs),
+    list(grid_layout = list(ncol = 2), plots = stats_boxplot_configs),
+    list(grid_layout = list(ncol = 4), plots = delta_plot_configs)  # nrow calculated dynamically
   ))
 }
 
@@ -1464,7 +1485,8 @@ main <- function() {
         variables = c("L", "K", "r", "AUC"),
         group_vars = c("OrfRep", "Gene", "Drug", "num", "conc_num", "conc_num_factor_factor")
         )$df_with_stats
-        message("Calculating reference strain interaction scores")
+      
+      message("Calculating reference strain interaction scores")
       results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug", "num"))
       df_calculations_reference <- results$calculations
       df_interactions_reference <- results$interactions
@@ -1472,37 +1494,37 @@ main <- function() {
       write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
       write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
 
-      # message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
-      # df_deletion <- df_na_stats %>% # formerly X2
-      #   filter(OrfRep != strain) %>%
-      #   filter(!is.na(L)) %>%
-      #   group_by(OrfRep, Gene, conc_num) %>%
-      #   mutate(
-      #     max_l_theoretical = max(max_L, na.rm = TRUE),
-      #     L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
-      #     SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
-      #     L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
-      #   ungroup()
-
-      # message("Calculating deletion strain(s) summary statistics")
-      # df_deletion_stats <- calculate_summary_stats(
-      #   df = df_deletion,
-      #   variables = c("L", "K", "r", "AUC"),
-      #   group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
-      #   )$df_with_stats
-
-      # message("Calculating deletion strain(s) interactions scores")
-      # results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
-      # df_calculations <- results$calculations
-      # df_interactions <- results$interactions
-      # df_interactions_joined <- results$full_data
-      # write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
-      # write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
-
       message("Generating reference interaction plots")
       reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, "reference")
       generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
 
+      message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
+      df_deletion <- df_na_stats %>% # formerly X2
+        filter(OrfRep != strain) %>%
+        filter(!is.na(L)) %>%
+        group_by(OrfRep, Gene, conc_num) %>%
+        mutate(
+          max_l_theoretical = max(max_L, na.rm = TRUE),
+          L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
+          SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
+          L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
+        ungroup()
+
+      message("Calculating deletion strain(s) summary statistics")
+      df_deletion_stats <- calculate_summary_stats(
+        df = df_deletion,
+        variables = c("L", "K", "r", "AUC"),
+        group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
+        )$df_with_stats
+
+      message("Calculating deletion strain(s) interactions scores")
+      results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
+      df_calculations <- results$calculations
+      df_interactions <- results$interactions
+      df_interactions_joined <- results$full_data
+      write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
+      write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
+
       message("Generating deletion interaction plots")
       deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, "deletion")
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)

+ 1 - 0
qhtcp-workflow/qhtcp-workflow

@@ -1443,6 +1443,7 @@ wrapper calculate_interaction_zscores
 #    * Plate analysis error bars and some others will be slightly different
 #    * Can be changed back but better to have plots reflect data, no?
 # * Dynamically generate axis limits based on data (if desired)
+# * Parallelize interaction plotting
 #
 # INPUT
 #