소스 검색

Loop the sd values once

Bryan Roessler 8 달 전
부모
커밋
94eb8db517
1개의 변경된 파일65개의 추가작업 그리고 97개의 파일을 삭제
  1. 65 97
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 65 - 97
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -382,8 +382,6 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
     # Use appropriate helper function based on plot type
     plot <- switch(config$plot_type,
       "scatter" = generate_scatter_plot(plot, config),
-      "rank" = generate_rank_plot(plot, config),
-      "correlation" = generate_correlation_plot(plot, config),
       "box" = generate_box_plot(plot, config),
       "density" = plot + geom_density(),
       "bar" = plot + geom_bar(),
@@ -412,7 +410,7 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
 }
 
 generate_scatter_plot <- function(plot, config, interactive = FALSE) {
-
+  
   # Add the interactive `text` aesthetic if `interactive` is TRUE
   if (interactive) {
     plot <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
@@ -439,6 +437,16 @@ generate_scatter_plot <- function(plot, config, interactive = FALSE) {
     }
   }
 
+  # Add SD bands (iterate over sd_band only here)
+  if (!is.null(config$sd_band)) {
+    for (i in config$sd_band) {
+      plot <- plot +
+        annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
+        annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
+        geom_hline(yintercept = c(-i, i), color = "gray")
+    }
+  }
+
   # Add error bars if specified
   if (!is.null(config$error_bar) && config$error_bar) {
     y_mean_col <- paste0("mean_", config$y_var)
@@ -470,42 +478,6 @@ generate_scatter_plot <- function(plot, config, interactive = FALSE) {
   return(plot)
 }
 
-generate_rank_plot <- function(plot, config) {
-  plot <- plot + geom_point(size = config$size %||% 0.1, shape = config$shape %||% 3)
-  
-  if (!is.null(config$sd_band)) {
-    for (i in seq_len(config$sd_band)) {
-      plot <- plot +
-        annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
-        annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
-        geom_hline(yintercept = c(-i, i), color = "gray")
-    }
-  }
-
-  if (!is.null(config$enhancer_label)) {
-    plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label)
-  }
-
-  if (!is.null(config$suppressor_label)) {
-    plot <- plot + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label)
-  }
-
-  return(plot)
-}
-
-generate_correlation_plot <- function(plot, config) {
-  plot <- plot + geom_point(shape = config$shape %||% 3, color = "gray70") +
-    geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "tomato3") +
-    annotate("text", x = config$annotate_position$x, y = config$annotate_position$y, label = config$correlation_text)
-  
-  if (!is.null(config$rect)) {
-    plot <- plot + geom_rect(aes(xmin = config$rect$xmin, xmax = config$rect$xmax, ymin = config$rect$ymin, ymax = config$rect$ymax),
-      color = "grey20", size = 0.25, alpha = 0.1, fill = NA, inherit.aes = FALSE)
-  }
-
-  return(plot)
-}
-
 generate_box_plot <- function(plot, config) {
   plot <- plot + geom_boxplot()
   
@@ -524,6 +496,21 @@ generate_box_plot <- function(plot, config) {
   return(plot)
 }
 
+# Adjust missing values and calculate ranks
+adjust_missing_and_rank <- function(df, variables) {
+
+  # Adjust missing values in Avg_Zscore and Z_lm columns, and apply rank to the specified variables
+  df <- df %>%
+    mutate(across(all_of(variables), list(
+      Avg_Zscore = ~ if_else(is.na(get(paste0("Avg_Zscore_", cur_column()))), 0.001, get(paste0("Avg_Zscore_", cur_column()))),
+      Z_lm = ~ if_else(is.na(get(paste0("Z_lm_", cur_column()))), 0.001, get(paste0("Z_lm_", cur_column()))),
+      Rank = ~ rank(get(paste0("Avg_Zscore_", cur_column()))),
+      Rank_lm = ~ rank(get(paste0("Z_lm_", cur_column())))
+    ), .names = "{fn}_{col}"))
+
+  return(df)
+}
+
 generate_interaction_plot_configs <- function(df, variables) {
   configs <- list()
 
@@ -632,69 +619,50 @@ generate_interaction_plot_configs <- function(df, variables) {
   return(configs)
 }
 
-# Adjust missing values and calculate ranks
-adjust_missing_and_rank <- function(df, variables) {
-
-  # Adjust missing values in Avg_Zscore and Z_lm columns, and apply rank to the specified variables
-  df <- df %>%
-    mutate(across(all_of(variables), list(
-      Avg_Zscore = ~ if_else(is.na(get(paste0("Avg_Zscore_", cur_column()))), 0.001, get(paste0("Avg_Zscore_", cur_column()))),
-      Z_lm = ~ if_else(is.na(get(paste0("Z_lm_", cur_column()))), 0.001, get(paste0("Z_lm_", cur_column()))),
-      Rank = ~ rank(get(paste0("Avg_Zscore_", cur_column()))),
-      Rank_lm = ~ rank(get(paste0("Z_lm_", cur_column())))
-    ), .names = "{fn}_{col}"))
-
-  return(df)
-}
-
 generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) {
   configs <- list()
   
-  # Adjust titles for _lm plots if is_lm is TRUE
   plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for"
-  
-  # Annotated version (with text)
-  for (sd_band in c(1, 2, 3)) {
-    configs[[length(configs) + 1]] <- list(
-      df = df,
-      x_var = rank_var,
-      y_var = zscore_var,
-      plot_type = "rank",
-      title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
-      sd_band = sd_band,
-      enhancer_label = list(
-        x = nrow(df) / 2, y = 10,
-        label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ]))
-      ),
-      suppressor_label = list(
-        x = nrow(df) / 2, y = -10,
-        label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
-      ),
-      shape = 3,
-      size = 0.1
-    )
-  }
-  
-  # Non-annotated version (_notext)
-  for (sd_band in c(1, 2, 3)) {
-    configs[[length(configs) + 1]] <- list(
-      df = df,
-      x_var = rank_var,
-      y_var = zscore_var,
-      plot_type = "rank",
-      title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
-      sd_band = sd_band,
-      enhancer_label = NULL,  # No annotations for _notext
-      suppressor_label = NULL,  # No annotations for _notext
-      shape = 3,
-      size = 0.1,
-      position = "jitter"
-    )
-  }
-  
+
+  # Single config with all sd bands
+  configs[[length(configs) + 1]] <- list(
+    df = df,
+    x_var = rank_var,
+    y_var = zscore_var,
+    plot_type = "scatter",
+    title = paste(plot_title_prefix, var, "Rank Plot"),
+    sd_band = c(1, 2, 3),  # Pass all sd bands at once
+    enhancer_label = list(
+      x = nrow(df) / 2, y = 10,
+      label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= 1, ]))  # Example for the first SD band
+    ),
+    suppressor_label = list(
+      x = nrow(df) / 2, y = -10,
+      label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -1, ]))
+    ),
+    shape = 3,
+    size = 0.1
+  )
+
+  # Non-annotated version
+  configs[[length(configs) + 1]] <- list(
+    df = df,
+    x_var = rank_var,
+    y_var = zscore_var,
+    plot_type = "scatter",
+    title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
+    sd_band = c(1, 2, 3),
+    enhancer_label = NULL,
+    suppressor_label = NULL,
+    shape = 3,
+    size = 0.1,
+    position = "jitter"
+  )
+
   return(configs)
 }
 
+
 generate_correlation_plot_configs <- function(df, variables) {
   configs <- list()
 
@@ -707,7 +675,7 @@ generate_correlation_plot_configs <- function(df, variables) {
       df = df,
       x_var = avg_zscore_var,
       y_var = z_lm_var,
-      plot_type = "correlation",
+      plot_type = "scatter",
       title = paste("Avg Zscore vs lm", variable),
       color_var = "Overlap",
       correlation_text = paste("R-squared =", round(df[[lm_r_squared_col]][1], 2)),
@@ -1194,7 +1162,7 @@ main <- function() {
       )
       generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
         plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
-      
+          
       rank_lm_plot_configs <- c(
         generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
         generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)