Преглед изворни кода

Filter non-finite data for plate analysis plots

Bryan Roessler пре 7 месеци
родитељ
комит
cbe363e8ad
1 измењених фајлова са 26 додато и 15 уклоњено
  1. 26 15
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 26 - 15
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -634,6 +634,16 @@ generate_plate_analysis_plot_configs <- function(variables, stages = c("before",
   for (var in variables) {
     for (stage in stages) {
       df_plot <- if (stage == "before") df_before else df_after
+
+      # Check for non-finite values in the y-variable
+      df_plot_filtered <- df_plot %>%
+        filter(is.finite(!!sym(var)))
+      
+      # Count removed rows
+      removed_rows <- nrow(df_plot) - nrow(df_plot_filtered)
+      if (removed_rows > 0) {
+        message(sprintf("Removed %d non-finite values for variable %s during stage %s", removed_rows, var, stage))
+      }
       
       # Adjust settings based on plot_type
       if (plot_type == "scatter") {
@@ -660,7 +670,7 @@ generate_plate_analysis_plot_configs <- function(variables, stages = c("before",
   return(plots)
 }
 
-generate_interaction_plot_configs <- function(df, variables, limits_map = NULL) {
+generate_interaction_plot_configs <- function(df, limits_map = NULL) {
   # Default limits_map if not provided
   if (is.null(limits_map)) {
     limits_map <- list(
@@ -682,8 +692,8 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL)
 
   configs <- list()
 
-  for (variable in variables) {
-    y_range <- limits_map[[variable]]
+  for (var in names(limits_map)) {
+    y_range <- limits_map[[var]]
     
     # Calculate annotation positions
     y_min <- min(y_range)
@@ -699,8 +709,8 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL)
 
     # Prepare linear model line
     lm_line <- list(
-      intercept = df_filtered[[paste0("lm_intercept_", variable)]],
-      slope = df_filtered[[paste0("lm_slope_", variable)]]
+      intercept = df_filtered[[paste0("lm_intercept_", var)]],
+      slope = df_filtered[[paste0("lm_slope_", var)]]
     )
 
     # Calculate x-axis position for annotations
@@ -710,8 +720,8 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL)
     # Generate annotations
     annotations <- lapply(names(annotation_positions), function(annotation_name) {
       label <- switch(annotation_name,
-        ZShift = paste("ZShift =", round(df_filtered[[paste0("Z_Shift_", variable)]], 2)),
-        lm_ZScore = paste("lm ZScore =", round(df_filtered[[paste0("Z_lm_", variable)]], 2)),
+        ZShift = paste("ZShift =", round(df_filtered[[paste0("Z_Shift_", var)]], 2)),
+        lm_ZScore = paste("lm ZScore =", round(df_filtered[[paste0("Z_lm_", var)]], 2)),
         NG = paste("NG =", df_filtered$NG),
         DB = paste("DB =", df_filtered$DB),
         SM = paste("SM =", df_filtered$SM),
@@ -729,7 +739,7 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL)
     plot_settings <- list(
       df = df_filtered,
       x_var = "conc_num_factor",
-      y_var = variable,
+      y_var = var,
       ylim_vals = y_range,
       annotations = annotations,
       lm_line = lm_line,
@@ -1023,7 +1033,6 @@ main <- function() {
       df = df,
       variables = summary_vars,
       group_vars = c("conc_num", "conc_num_factor"))$df_with_stats
-    message("Filtering non-finite data")
 
     message("Calculating summary statistics after quality control")
     ss <- calculate_summary_stats(
@@ -1033,6 +1042,8 @@ main <- function() {
     df_na_ss <- ss$summary_stats
     df_na_stats <- ss$df_with_stats
     write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
+    # For plotting (ggplot warns on NAs)
+    df_na_stats_filtered <- df_na_stats %>% filter(across(all_of(summary_vars), is.finite))
 
     df_na_stats <- df_na_stats %>%
       mutate(
@@ -1153,20 +1164,20 @@ main <- function() {
     plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
       variables = summary_vars,
       df_before = df_stats,
-      df_after = df_na_stats,
+      df_after = df_na_stats_filtered
     )
 
     plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs(
       variables = summary_vars,
       df_before = df_stats,
-      df_after = df_na_stats,
+      df_after = df_na_stats_filtered,
       plot_type = "box"
     )
 
     plate_analysis_no_zeros_plot_configs <- generate_plate_analysis_plot_configs(
       variables = summary_vars,
       stages = c("after"),  # Only after QC
-      df_after = df_no_zeros_stats,
+      df_after = df_no_zeros_stats
     )
 
     plate_analysis_no_zeros_boxplot_configs <- generate_plate_analysis_plot_configs(
@@ -1208,7 +1219,7 @@ main <- function() {
 
     # TODO trying out some parallelization
     # future::plan(future::multicore, workers = parallel::detectCores())
-    future::plan(future::multisession, workers = 3)
+    future::plan(future::multisession, workers = 3) # generate 3 plots in parallel
 
     plot_configs <- list(
       list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
@@ -1318,11 +1329,11 @@ main <- function() {
 
       # Create interaction plots
       message("Generating reference interaction plots")
-      reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, interaction_vars)
+      reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined)
       generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
 
       message("Generating deletion interaction plots")
-      deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, interaction_vars)
+      deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined)
       generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
 
       # Define conditions for enhancers and suppressors