Prechádzať zdrojové kódy

Explicitly filter data before plotting to silence ggplot

Bryan Roessler 8 mesiacov pred
rodič
commit
5dfa2d1650

+ 55 - 39
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -365,7 +365,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 }
 
 generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
-  message("Generating html and pdf plots for: ", file_name)
+  message("Generating ", file_name, ".pdf and ", file_name, ".html")
 
   # Prepare lists to collect plots
   static_plots <- list()
@@ -456,32 +456,6 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
 }
 
 generate_scatter_plot <- function(plot, config) {
-  # Check for missing or out-of-range data
-  missing_data <- config$df %>%
-    filter(
-      is.na(!!sym(config$x_var)) | is.na(!!sym(config$y_var)) |
-        !!sym(config$y_var) < min(config$ylim_vals, na.rm = TRUE) |
-        !!sym(config$y_var) > max(config$ylim_vals, na.rm = TRUE)
-    )
-
-  # Print the rows with missing or out-of-range data if any
-  if (nrow(missing_data) > 0) {
-    message("Missing or out-of-range data for ", config$title, ":")
-    print(
-      missing_data %>% select(any_of(
-        c(
-          "OrfRep",
-          "Gene",
-          "num",
-          "conc_num",
-          "conc_num_factor",
-          config$x_var,
-          config$y_var
-        )
-      )),
-      n = 100
-    )
-  }
 
   if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
     plot <- plot + geom_point(
@@ -620,6 +594,10 @@ adjust_missing_and_rank <- function(df, variables) {
 generate_interaction_plot_configs <- function(df, variables) {
   configs <- list()
   
+  # Data frames to collect filtered data and out-of-range data
+  filtered_data_list <- list()
+  out_of_range_data_list <- list()
+  
   # Define common y-limits for each variable
   limits_map <- list(
     L = c(-65, 65),
@@ -630,8 +608,8 @@ generate_interaction_plot_configs <- function(df, variables) {
   
   # Define annotation positions and labels
   annotation_positions <- list(
-    Z_Shift = 45,
-    Z_lm = 25,
+    ZShift = 45,
+    lm_ZScore = 25,
     NG = -25,
     DB = -35,
     SM = -45
@@ -639,11 +617,11 @@ generate_interaction_plot_configs <- function(df, variables) {
   
   # Define functions to generate annotation labels
   annotation_labels <- list(
-    Z_Shift = function(df, var) {
+    ZShift = function(df, var) {
       val <- df[[paste0("Z_Shift_", var)]]
       paste("ZShift =", round(val, 2))
     },
-    Z_lm = function(df, var) {
+    lm_ZScore = function(df, var) {
       val <- df[[paste0("Z_lm_", var)]]
       paste("lm ZScore =", round(val, 2))
     },
@@ -678,9 +656,30 @@ generate_interaction_plot_configs <- function(df, variables) {
     # Remove NULL annotations
     annotations <- Filter(Negate(is.null), annotations)
     
+    # Filter the data based on y-limits and missing values
+    y_var_sym <- sym(variable)
+    x_var_sym <- sym("conc_num_factor")
+    
+    # Identify missing data and out-of-range data
+    missing_data <- df %>% filter(is.na(!!x_var_sym) | is.na(!!y_var_sym))
+    out_of_range_data <- df %>% filter(
+      !is.na(!!y_var_sym) & 
+        (!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
+    )
+    
+    # Combine missing data and out-of-range data
+    data_to_filter_out <- bind_rows(missing_data, out_of_range_data) %>% distinct()
+    
+    # Filtered data for plotting
+    filtered_data <- df %>% anti_join(data_to_filter_out, by = names(df))
+    
+    # Collect the filtered data and out-of-range data
+    filtered_data_list[[variable]] <- filtered_data
+    out_of_range_data_list[[variable]] <- data_to_filter_out
+    
     # Create scatter plot config
     configs[[length(configs) + 1]] <- list(
-      df = df,
+      df = filtered_data,
       x_var = "conc_num_factor",
       y_var = variable,
       plot_type = "scatter",
@@ -698,7 +697,7 @@ generate_interaction_plot_configs <- function(df, variables) {
     
     # Create box plot config
     configs[[length(configs) + 1]] <- list(
-      df = df,
+      df = filtered_data,
       x_var = "conc_num_factor",
       y_var = variable,
       plot_type = "box",
@@ -713,10 +712,16 @@ generate_interaction_plot_configs <- function(df, variables) {
     )
   }
   
-  return(configs)
+  # Combine the filtered data and out-of-range data into data frames
+  filtered_data_df <- bind_rows(filtered_data_list, .id = "variable")
+  out_of_range_data_df <- bind_rows(out_of_range_data_list, .id = "variable")
+  
+  return(list(
+    configs = configs,
+    out_of_range_data = out_of_range_data_df
+  ))
 }
 
-
 generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) {
   configs <- list()
   
@@ -1151,11 +1156,22 @@ main <- function() {
       write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
 
       # Create interaction plots
-      message("Generating interaction plot configurations")
-      reference_plot_configs <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
-      deletion_plot_configs <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars)
-      message("Generating interaction plots")
+      message("Generating reference interaction plots")
+      results <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
+      if (nrow(results$out_of_range_data) > 0) {
+        message("Out-of-range data:")
+        print(results$out_of_range_data)
+      }
+      reference_plot_configs <- results$configs
       generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
+
+      message("Generating deletion interaction plots")
+      results <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars)
+      if (nrow(results$out_of_range_data) > 0) {
+        message("Out-of-range data:")
+        print(results$out_of_range_data)
+      }
+      deletion_plot_configs <- results$configs
       generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
 
       # Define conditions for enhancers and suppressors