浏览代码

Improve interaction plot groupings

Bryan Roessler 6 月之前
父节点
当前提交
9769bae3d1
共有 1 个文件被更改,包括 44 次插入34 次删除
  1. 44 34
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 44 - 34
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -340,11 +340,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
       Z_Shift_L = first(Z_Shift_L),
       Z_Shift_K = first(Z_Shift_K),
       Z_Shift_r = first(Z_Shift_r),
-      Z_Shift_AUC = first(Z_Shift_AUC),
+      Z_Shift_AUC = first(Z_Shift_AUC)
       
-      NG = first(NG),
-      DB = first(DB),
-      SM = first(SM)
     ) %>%
     arrange(desc(Z_lm_L), desc(NG)) %>%
     ungroup() %>%
@@ -669,16 +666,14 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
 
 generate_interaction_plot_configs <- function(df, type) {
 
+  # Set group_vars based on the type (reference or deletion)
   if (type == "reference") {
     group_vars <- c("OrfRep", "Gene", "num")
-    df <- df %>%
-      mutate(OrfRepCombined = paste(!!!syms(group_vars), sep = "_"))
   } else if (type == "deletion") {
     group_vars <- c("OrfRep", "Gene")
-    df <- df %>%
-      mutate(OrfRepCombined = OrfRep)
   }
 
+  # Define the limits for the plots
   limits_map <- list(
     L = c(0, 130),
     K = c(-20, 160),
@@ -696,14 +691,18 @@ generate_interaction_plot_configs <- function(df, type) {
   overall_plot_configs <- list()
   delta_plot_configs <- list()
 
-  # Overall plots with lm_line for each interaction
+  # Overall statistics plots
   for (var in names(limits_map)) {
     y_limits <- limits_map[[var]]
-    
+
     # Use the pre-calculated lm intercept and slope from the dataframe
     lm_intercept_col <- paste0("lm_intercept_", var)
     lm_slope_col <- paste0("lm_slope_", var)
 
+    # Ensure no NA or invalid values in lm_line calculations
+    intercept_value <- mean(df[[lm_intercept_col]], na.rm = TRUE)
+    slope_value <- mean(df[[lm_slope_col]], na.rm = TRUE)
+
     plot_config <- list(
       df = df,
       plot_type = "scatter",
@@ -716,39 +715,47 @@ generate_interaction_plot_configs <- function(df, type) {
       x_breaks = unique(df$conc_num_factor_factor),
       x_labels = as.character(unique(df$conc_num)),
       position = "jitter",
-      smooth = TRUE,  
+      smooth = TRUE,
       lm_line = list(
-        intercept = mean(df[[lm_intercept_col]], na.rm = TRUE),
-        slope = mean(df[[lm_slope_col]], na.rm = TRUE)
+        intercept = intercept_value,
+        slope = slope_value
       )
     )
     overall_plot_configs <- append(overall_plot_configs, list(plot_config))
   }
 
-  # Delta plots (add lm_line if necessary)
-  unique_groups <- df %>% select(all_of(group_vars)) %>% distinct()
+  # Delta interaction plots
+  grouped_data <- df %>%
+    group_by(across(all_of(group_vars))) %>%
+    group_split()
 
-  for (i in seq_len(nrow(unique_groups))) {
-    group <- unique_groups[i, ]
-    group_data <- df %>% semi_join(group, by = group_vars)
+  for (group_data in grouped_data) {
+    OrfRep <- first(group_data$OrfRep)
+    Gene <- first(group_data$Gene)
+    num <- if ("num" %in% names(group_data)) first(group_data$num) else ""
 
-    OrfRep <- as.character(group$OrfRep)
-    Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else ""
-    num <- if ("num" %in% names(group)) as.character(group$num) else ""
+    if (type == "reference") {
+        OrfRepTitle <- paste(OrfRep, Gene, num, sep = "_")
+    } else if (type == "deletion") {
+        OrfRepTitle <- OrfRep
+    }
 
     for (var in names(delta_limits_map)) {
       y_limits <- delta_limits_map[[var]]
       y_span <- y_limits[2] - y_limits[1]
 
-      # For error bars
-      WT_sd_value <- group_data[[paste0("WT_sd_", var)]][1]
+      # Error bars
+      WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0)
+
+      # Z_Shift and lm values
+      Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2)
+      Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2)
+      R_squared_value <- round(first(group_data[[paste0("R_squared_", var)]], default = 0), 2)
 
-      Z_Shift_value <- round(group_data[[paste0("Z_Shift_", var)]][1], 2)
-      Z_lm_value <- round(group_data[[paste0("Z_lm_", var)]][1], 2)
-      R_squared_value <- round(group_data[[paste0("R_squared_", var)]][1], 2)
-      NG_value <- group_data$NG[1]
-      DB_value <- group_data$DB[1]
-      SM_value <- group_data$SM[1]
+      # NG, DB, SM values
+      NG_value <- first(group_data$NG, default = 0)
+      DB_value <- first(group_data$DB, default = 0)
+      SM_value <- first(group_data$SM, default = 0)
 
       annotations <- list(
         list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
@@ -759,14 +766,17 @@ generate_interaction_plot_configs <- function(df, type) {
         list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
       )
 
-      # Delta plot configuration with lm_line if needed
+      # lm_line for delta plots
+      lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0)
+      lm_slope_value <- first(group_data[[lm_slope_col]], default = 0)
+
       plot_config <- list(
         df = group_data,
         plot_type = "scatter",
         x_var = "conc_num_factor_factor",
         y_var = var,
         x_label = unique(group_data$Drug)[1],
-        title = paste(OrfRepCombined, Gene, sep = "      "),
+        title = paste(OrfRepTitle, Gene, num, sep = "      "),
         coord_cartesian = y_limits,
         annotations = annotations,
         error_bar = TRUE,
@@ -779,15 +789,15 @@ generate_interaction_plot_configs <- function(df, type) {
         x_labels = as.character(unique(group_data$conc_num)),
         ylim_vals = y_limits,
         lm_line = list(
-          intercept = group_data[[lm_intercept_col]][1],
-          slope = group_data[[lm_slope_col]][1]
+          intercept = lm_intercept_value,
+          slope = lm_slope_value
         )
       )
       delta_plot_configs <- append(delta_plot_configs, list(plot_config))
     }
   }
 
-  # Calculate dynamic grid layout based on the number of plots for the delta_L plots
+  # Calculate dynamic grid layout
   grid_ncol <- 4
   num_plots <- length(delta_plot_configs)
   grid_nrow <- ceiling(num_plots / grid_ncol)