Jelajahi Sumber

Improve WT groupings

Bryan Roessler 7 bulan lalu
induk
melakukan
f207e40efd
1 mengubah file dengan 15 tambahan dan 34 penghapusan
  1. 15 34
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 15 - 34
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -210,7 +210,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
 
   # Calculate WT statistics from df_bg
   wt_stats <- df_bg %>%
-    filter(conc_num == 0) %>%
+    group_by(across(all_of(group_vars)), conc_num, conc_num_factor_factor) %>%
     summarise(
       WT_L = mean(mean_L, na.rm = TRUE),
       WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -219,21 +219,13 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
       WT_r = mean(mean_r, na.rm = TRUE),
       WT_sd_r = mean(sd_r, na.rm = TRUE),
       WT_AUC = mean(mean_AUC, na.rm = TRUE),
-      WT_sd_AUC = mean(sd_AUC, na.rm = TRUE)
+      WT_sd_AUC = mean(sd_AUC, na.rm = TRUE),
+      .groups = "drop"
     )
 
-  # Add WT statistics to df
+  # Join WT stats back to df
   df <- df %>%
-    mutate(
-      WT_L = wt_stats$WT_L,
-      WT_sd_L = wt_stats$WT_sd_L,
-      WT_K = wt_stats$WT_K,
-      WT_sd_K = wt_stats$WT_sd_K,
-      WT_r = wt_stats$WT_r,
-      WT_sd_r = wt_stats$WT_sd_r,
-      WT_AUC = wt_stats$WT_AUC,
-      WT_sd_AUC = wt_stats$WT_sd_AUC
-    )
+    left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor_factor"))
 
   # Compute mean values at zero concentration
   mean_L_zero_df <- df %>%
@@ -804,16 +796,6 @@ generate_scatter_plot <- function(plot, config) {
           color = smooth_color
         )
     }
-    
-    # For now I want to try and hardcode it
-    # else {
-    #   plot <- plot +
-    #     geom_smooth(
-    #       method = "lm",
-    #       se = FALSE,
-    #       color = smooth_color
-    #     )
-    # }
   }
   
   # Add SD Bands if specified
@@ -1077,8 +1059,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type)
         title_size = rel(1.3),
         coord_cartesian = y_limits,
         annotations = list(
-          list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste("     ZShift =", round(Z_Shift_value, 2))),
-          list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("             lm ZScore =", round(Z_lm_value, 2))),
+          list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste("      ZShift =", round(Z_Shift_value, 2))),
+          list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("              lm ZScore =", round(Z_lm_value, 2))),
           # list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("             R-squared =", round(R_squared_value, 2))),
           list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG_value)),
           list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB_value)),
@@ -1582,11 +1564,10 @@ main <- function() {
         )$df_with_stats
       
       message("Calculating reference strain interaction scores")
-      results <- calculate_interaction_scores(df_reference_interaction_stats,
-        df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
-      df_reference_calculations <- results$calculations
-      df_reference_interactions <- results$interactions
-      df_reference_interactions_joined <- results$full_data
+      reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
+      df_reference_calculations <- reference_results$calculations
+      df_reference_interactions <- reference_results$interactions
+      df_reference_interactions_joined <- reference_results$full_data
       write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
       write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
 
@@ -1614,10 +1595,10 @@ main <- function() {
         )$df_with_stats
 
       message("Calculating deletion strain(s) interactions scores")
-      results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
-      df_calculations <- results$calculations
-      df_interactions <- results$interactions
-      df_interactions_joined <- results$full_data
+      deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
+      df_calculations <- deletion_results$calculations
+      df_interactions <- deletion_results$interactions
+      df_interactions_joined <- deletion_results$full_data
       write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
       write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)