浏览代码

Move y-limit filtering from generate_and_save_plots to generate_interaction_plot_configs

Bryan Roessler 7 月之前
父节点
当前提交
c6fec56630
共有 1 个文件被更改,包括 71 次插入74 次删除
  1. 71 74
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 71 - 74
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -525,36 +525,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
       config <- group$plots[[i]]
       df <- config$df
 
-      # Filter and debug out-of-bounds data
-      if (!is.null(config$ylim_vals)) {
-        out_of_bounds <- df %>%
-          filter(
-            is.na(.data[[config$y_var]]) |
-            .data[[config$y_var]] < config$ylim_vals[1] |
-            .data[[config$y_var]] > config$ylim_vals[2]
-          )
-        if (nrow(out_of_bounds) > 0) {
-          message("Filtered ", nrow(out_of_bounds), " row(s) from '", config$title, "' because ", config$y_var,
-                  " is outside of y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]:")
-          # print(out_of_bounds %>% select(OrfRep, Gene, num, Drug, scan, Plate, Row, Col, conc_num, all_of(config$y_var)), width = 1000)
-        }
-        df <- df %>%
-          filter(
-            !is.na(.data[[config$y_var]]) &
-            .data[[config$y_var]] >= config$ylim_vals[1] &
-            .data[[config$y_var]] <= config$ylim_vals[2]
-          )
-      }
-
       # Filter NAs
       if (!is.null(config$filter_na) && config$filter_na) {
         df <- df %>%
           filter(!is.na(.data[[config$y_var]]))
       }
 
+      # TODO for now skip all NA plots NA data
+      # Eventually add to own or filter_na block so we can handle selectively
       if (nrow(df) == 0) {
-        message("No data available after filtering for plot ", config$title)
-        next  # Skip this plot if no data is available
+        message("Insufficient data for plot:", config$title)
+        next  # skip plot if insufficient data is available
       }
 
       aes_mapping <- if (config$plot_type == "bar") {
@@ -599,10 +580,10 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
       if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
       if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
 
-      plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
+      #plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
 
       static_plots[[i]] <- plot
-      plotly_plots[[i]] <- plotly_plot
+      #plotly_plots[[i]] <- plotly_plot
     }
 
     grid_layout <- group$grid_layout
@@ -624,16 +605,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
         static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
       }
 
-      tryCatch({
-        grid.arrange(
-          grobs = static_plots,
-          ncol = grid_layout$ncol,
-          nrow = grid_layout$nrow
-        )
-      }, error = function(e) {
-        message("Error in grid.arrange: ", e$message)
-        print(static_plots)
-      })
+      grid.arrange(
+        grobs = static_plots,
+        ncol = grid_layout$ncol,
+        nrow = grid_layout$nrow
+      )
       
     } else {
       for (plot in static_plots) {
@@ -644,15 +620,14 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
 
   dev.off()
 
-  out_html_file <- file.path(out_dir, paste0(filename, ".html"))
-  message("Saving combined HTML file: ", out_html_file)
-  htmltools::save_html(
-    htmltools::tagList(plotly_plots),
-    file = out_html_file
-  )
+  # out_html_file <- file.path(out_dir, paste0(filename, ".html"))
+  # message("Saving combined HTML file: ", out_html_file)
+  # htmltools::save_html(
+  #   htmltools::tagList(plotly_plots),
+  #   file = out_html_file
+  # )
 }
 
-
 generate_scatter_plot <- function(plot, config) {
 
   # Define the points
@@ -790,7 +765,7 @@ generate_scatter_plot <- function(plot, config) {
             )
           }
         } else {
-          message("Skipping linear modeling line due to y-values outside of limits.")
+          message("Skipping linear regression line due to y-values outside of limits")
         }
       } else {
         # If no y-limits are provided, proceed with the annotation
@@ -805,7 +780,7 @@ generate_scatter_plot <- function(plot, config) {
         )
       }
     } else {
-      message("Skipping linear modeling line due to missing or invalid values.")
+      message("Skipping linear regression line due to missing or invalid values")
     }
   }
 
@@ -984,7 +959,6 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
         )
         plot_config$position <- "jitter"
 
-        # Cannot figure out how to place these properly for discrete x-axis so let's be hacky
         annotations <- list(
           list(x = 0.25, y = y_limits[1] + 0.08 * y_span, label = "                NG =", size = 4),
           list(x = 0.25, y = y_limits[1] + 0.04 * y_span, label = "                DB =", size = 4),
@@ -1045,26 +1019,50 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
     for (var in names(delta_limits_map)) {
       y_limits <- delta_limits_map[[var]]
       y_span <- y_limits[2] - y_limits[1]
+      y_var_name <- paste0("Delta_", var)
 
-      WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0)
-      Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2)
-      Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2)
-      R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2)
+      # Anti-filter to select out-of-bounds rows
+      out_of_bounds <- group_data %>%
+        filter(is.na(.data[[y_var_name]]) |
+        .data[[y_var_name]] < y_limits[1] |
+        .data[[y_var_name]] > y_limits[2])
+      
+      if (nrow(out_of_bounds) > 0) {
+        message(sprintf("Filtered %d row(s) from '%s' because %s is outside of y-limits: [%f, %f]",
+          nrow(out_of_bounds), OrfRepTitle, y_var_name, y_limits[1], y_limits[2]
+        ))
+      }
 
-      NG_value <- first(group_data$NG, default = 0)
-      DB_value <- first(group_data$DB, default = 0)
-      SM_value <- first(group_data$SM, default = 0)
+      # Do the actual filtering
+      group_data_filtered <- group_data %>%
+        filter(!is.na(.data[[y_var_name]]) &
+          .data[[y_var_name]] >= y_limits[1] &
+          .data[[y_var_name]] <= y_limits[2])
 
+      if (nrow(group_data_filtered) == 0) {
+        message("Insufficient data for plot: ", OrfRepTitle, " ", var)
+        next  # skip plot if insufficient data is available
+      }
+      
+      WT_sd_value <- first(group_data_filtered[[paste0("WT_sd_", var)]], default = 0)
+      Z_Shift_value <- round(first(group_data_filtered[[paste0("Z_Shift_", var)]], default = 0), 2)
+      Z_lm_value <- round(first(group_data_filtered[[paste0("Z_lm_", var)]], default = 0), 2)
+      R_squared_value <- round(first(group_data_filtered[[paste0("R_Squared_", var)]], default = 0), 2)
+      
+      NG_value <- first(group_data_filtered$NG, default = 0)
+      DB_value <- first(group_data_filtered$DB, default = 0)
+      SM_value <- first(group_data_filtered$SM, default = 0)
+      
       lm_intercept_col <- paste0("lm_intercept_", var)
       lm_slope_col <- paste0("lm_slope_", var)
-      lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0)
-      lm_slope_value <- first(group_data[[lm_slope_col]], default = 0)
-
+      lm_intercept_value <- first(group_data_filtered[[lm_intercept_col]], default = 0)
+      lm_slope_value <- first(group_data_filtered[[lm_slope_col]], default = 0)
+      
       plot_config <- list(
-        df = group_data,
+        df = group_data_filtered,
         plot_type = "scatter",
         x_var = "conc_num_factor_factor",
-        y_var = paste0("Delta_", var),
+        y_var = y_var_name,
         x_label = paste0("[", unique(df_summary$Drug)[1], "]"),
         shape = 16,
         title = paste(OrfRepTitle, Gene, sep = "      "),
@@ -1087,27 +1085,26 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
           color = "gray70",
           linewidth = 0.5
         ),
-        x_breaks = unique(group_data$conc_num_factor_factor),
-        x_labels = as.character(unique(group_data$conc_num)),
+        x_breaks = unique(group_data_filtered$conc_num_factor_factor),
+        x_labels = as.character(unique(group_data_filtered$conc_num)),
         ylim_vals = y_limits,
-        # filter_na = TRUE,
         lm_line = list(
           intercept = lm_intercept_value,
           slope = lm_slope_value,
           color = "blue",
           linewidth = 0.8,
-          x_min = min(as.numeric(group_data$conc_num_factor_factor)),
-          x_max = max(as.numeric(group_data$conc_num_factor_factor))
+          x_min = min(as.numeric(group_data_filtered$conc_num_factor_factor)),
+          x_max = max(as.numeric(group_data_filtered$conc_num_factor_factor))
         )
       )
       delta_plot_configs <- append(delta_plot_configs, list(plot_config))
     }
   }
-
+  
   # Group delta plots in chunks of 12 per page
   chunk_size <- 12
   delta_plot_chunks <- split(delta_plot_configs, ceiling(seq_along(delta_plot_configs) / chunk_size))
-
+  
   return(c(
     list(list(grid_layout = list(ncol = 2), plots = stats_plot_configs)),
     list(list(grid_layout = list(ncol = 2), plots = stats_boxplot_configs)),
@@ -1587,16 +1584,16 @@ main <- function() {
         group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
         )$df_with_stats
       
-      # message("Calculating reference strain interaction scores")
-      # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
-      # df_reference_interactions_joined <- reference_results$full_data
-      # df_reference_interactions <- reference_results$interactions
-      # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
-      # write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
-
-      # message("Generating reference interaction plots")
-      # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
-      # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
+      message("Calculating reference strain interaction scores")
+      reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
+      df_reference_interactions_joined <- reference_results$full_data
+      df_reference_interactions <- reference_results$interactions
+      write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
+      write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
+
+      message("Generating reference interaction plots")
+      reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
+      generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
 
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       df_deletion <- df_na_stats %>% # formerly X2