Improve WT groupings

2024-10-05 06:00:25 -04:00
parent e6f5aab81f
commit f207e40efd
1 changed files with 15 additions and 34 deletions
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -210,7 +210,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol

  # Calculate WT statistics from df_bg
  wt_stats <- df_bg %>%
-    filter(conc_num == 0) %>%
+    group_by(across(all_of(group_vars)), conc_num, conc_num_factor_factor) %>%
    summarise(
      WT_L = mean(mean_L, na.rm = TRUE),
      WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -219,21 +219,13 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
      WT_r = mean(mean_r, na.rm = TRUE),
      WT_sd_r = mean(sd_r, na.rm = TRUE),
      WT_AUC = mean(mean_AUC, na.rm = TRUE),
-      WT_sd_AUC = mean(sd_AUC, na.rm = TRUE)
+      WT_sd_AUC = mean(sd_AUC, na.rm = TRUE),
+      .groups = "drop"
    )

-  # Add WT statistics to df
+  # Join WT stats back to df
  df <- df %>%
-    mutate(
-      WT_L = wt_stats$WT_L,
-      WT_sd_L = wt_stats$WT_sd_L,
-      WT_K = wt_stats$WT_K,
-      WT_sd_K = wt_stats$WT_sd_K,
-      WT_r = wt_stats$WT_r,
-      WT_sd_r = wt_stats$WT_sd_r,
-      WT_AUC = wt_stats$WT_AUC,
-      WT_sd_AUC = wt_stats$WT_sd_AUC
-    )
+    left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor_factor"))

  # Compute mean values at zero concentration
  mean_L_zero_df <- df %>%
@@ -804,16 +796,6 @@ generate_scatter_plot <- function(plot, config) {
          color = smooth_color
        )
    }
-    
-    # For now I want to try and hardcode it
-    # else {
-    #   plot <- plot +
-    #     geom_smooth(
-    #       method = "lm",
-    #       se = FALSE,
-    #       color = smooth_color
-    #     )
-    # }
  }
  
  # Add SD Bands if specified
@@ -1077,8 +1059,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type)
        title_size = rel(1.3),
        coord_cartesian = y_limits,
        annotations = list(
-          list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste("     ZShift =", round(Z_Shift_value, 2))),
-          list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("             lm ZScore =", round(Z_lm_value, 2))),
+          list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste("      ZShift =", round(Z_Shift_value, 2))),
+          list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("              lm ZScore =", round(Z_lm_value, 2))),
          # list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("             R-squared =", round(R_squared_value, 2))),
          list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG_value)),
          list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB_value)),
@@ -1582,11 +1564,10 @@ main <- function() {
        )$df_with_stats
      
      message("Calculating reference strain interaction scores")
-      results <- calculate_interaction_scores(df_reference_interaction_stats,
-        df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
-      df_reference_calculations <- results$calculations
-      df_reference_interactions <- results$interactions
-      df_reference_interactions_joined <- results$full_data
+      reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
+      df_reference_calculations <- reference_results$calculations
+      df_reference_interactions <- reference_results$interactions
+      df_reference_interactions_joined <- reference_results$full_data
      write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
      write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)

@@ -1614,10 +1595,10 @@ main <- function() {
        )$df_with_stats

      message("Calculating deletion strain(s) interactions scores")
-      results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
-      df_calculations <- results$calculations
-      df_interactions <- results$interactions
-      df_interactions_joined <- results$full_data
+      deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
+      df_calculations <- deletion_results$calculations
+      df_interactions <- deletion_results$interactions
+      df_interactions_joined <- deletion_results$full_data
      write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
      write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)