소스 검색

Begin debugging

Bryan Roessler 7 달 전
부모
커밋
97de737ed5
1개의 변경된 파일58개의 추가작업 그리고 109개의 파일을 삭제
  1. 58 109
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 58 - 109
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -337,18 +337,14 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
   
   `%||%` <- function(a, b) if (!is.null(a)) a else b
-  
-  # Generalized plot generation
+
   plots <- lapply(plot_configs, function(config) {
-    
     df <- config$df
     plot <- ggplot(df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))))
-    
-    # Rank plots with SD annotations
+
+    # Handle plot types like "rank", "correlation", and default scatter/box/density
     if (config$plot_type == "rank") {
       plot <- plot + geom_point(size = 0.1, shape = 3)
-      
-      # Adding SD bands
       if (!is.null(config$sd_band)) {
         for (i in seq_len(config$sd_band)) {
           plot <- plot +
@@ -357,111 +353,61 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
             geom_hline(yintercept = c(-i, i), color = "gray")
         }
       }
-
-      # Optionally add enhancer/suppressor count annotations
       if (!is.null(config$enhancer_label)) {
-        plot <- plot + annotate("text", x = config$enhancer_label$x,
-          y = config$enhancer_label$y, label = config$enhancer_label$label) +
-          annotate("text", x = config$suppressor_label$x,
-            y = config$suppressor_label$y, label = config$suppressor_label$label)
+        plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) +
+          annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label)
       }
-    }
-    
-    # Correlation plots
-    if (config$plot_type == "correlation") {
-      plot <- plot + geom_point(shape = 3, color = "gray70") +
-        geom_smooth(method = "lm", color = "tomato3") +
+    } else if (config$plot_type == "correlation") {
+      plot <- plot + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") +
         annotate("text", x = 0, y = 0, label = config$correlation_text)
+    } else {
+      plot <- plot + aes(y = !!sym(config$y_var)) +
+        if (config$plot_type == "box") geom_boxplot() else
+        if (config$plot_type == "density") geom_density() else
+        if (config$plot_type == "bar") geom_bar(stat = "identity") else geom_point(shape = 3) + geom_smooth(method = "lm", se = FALSE)
     }
 
-    # General scatter/boxplot/density handling
-    if (!is.null(config$y_var)) {
-      plot <- plot + aes(y = !!sym(config$y_var))
-      
+    # Add error bars for "delta_bg" or general cases
+    if (config$error_bar %||% FALSE) {
       y_mean_col <- paste0("mean_", config$y_var)
       y_sd_col <- paste0("sd_", config$y_var)
-      
-      if (config$y_var == "delta_bg" && config$plot_type == "scatter") {
-        plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") +
-          geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
-            ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
-          geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
-      } else if (config$error_bar %||% FALSE) {
-        plot <- plot +
-          geom_point(shape = 3, size = 0.2) +
-          geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
-            ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
-          geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
-      }
+      plot <- plot + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
+        ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
+        geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
     }
 
-    # Plot type selection
-    plot <- switch(config$plot_type,
-      "box" = plot + geom_boxplot(),
-      "density" = plot + geom_density(),
-      "bar" = plot + geom_bar(stat = "identity"),
-      plot + geom_point() + geom_smooth(method = "lm", se = FALSE))
-    
-    # Adding y-limits if provided
+    # Apply y-limits if provided
     if (!is.null(config$ylim_vals)) {
       plot <- plot + coord_cartesian(ylim = config$ylim_vals)
     }
 
-    # Setting legend position, titles, and labels
-    legend_position <- config$legend_position %||% "bottom"
-    plot <- plot + ggtitle(config$title) + theme_publication(legend_position = legend_position)
-    
-    if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
-    if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
-    
-    # Adding text annotations if provided
+    # Apply labels, titles, and legends
+    plot <- plot + ggtitle(config$title) + theme_publication(legend_position = config$legend_position %||% "bottom") +
+      if (!is.null(config$x_label)) xlab(config$x_label) else NULL +
+      if (!is.null(config$y_label)) ylab(config$y_label) else NULL
+
+    # Add annotations if available
     if (!is.null(config$annotations)) {
-      for (annotation in config$annotations) {
-        plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
-      }
+      plot <- plot + geom_text(aes(x = config$annotations$x, y = config$annotations$y, label = config$annotations$label))
     }
-    
+
     return(plot)
   })
-  
-  # If grid_layout is provided, arrange plots in a grid and save in a single PDF
-  if (!is.null(grid_layout)) {
-    pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
-    
-    # Loop through plots in chunks defined by ncol and nrow
-    for (start_idx in seq(1, length(plots), by = grid_layout$ncol * grid_layout$nrow)) {
-      end_idx <- min(start_idx + grid_layout$ncol * grid_layout$nrow - 1, length(plots))
-      plot_subset <- plots[start_idx:end_idx]
-      
-      # Arrange plots in a grid
-      do.call(grid.arrange, c(plot_subset, ncol = grid_layout$ncol, nrow = grid_layout$nrow))
-    }
-    
-    dev.off()
-    
-    plotly_plots <- lapply(plots, function(plot) {
-      suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
-    })
-    combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow, margin = 0.05)
-    saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
-    
-  } else {
-    # Save individual plots to PDF
-    pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
-    lapply(plots, print)
-    dev.off()
-    
-    # Convert plots to plotly and save as HTML
-    plotly_plots <- lapply(plots, function(plot) {
-      suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
-    })
-    combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05)
-    saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
-  }
+
+  # Save the plots
+  pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
+  lapply(plots, print)
+  dev.off()
+
+  plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))))
+
+  # Handle grid layout
+  combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05)
+  saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
 }
 
 generate_interaction_plot_configs <- function(df, variables) {
-  plot_configs <- list()
+  configs <- list()
 
   for (variable in variables) {
     # Define the y-limits based on the variable being plotted
@@ -493,7 +439,7 @@ generate_interaction_plot_configs <- function(df, variables) {
     )
 
     # Add scatter plot configuration for this variable
-    plot_configs[[length(plot_configs) + 1]] <- list(
+    configs[[length(configs) + 1]] <- list(
       df = df,
       x_var = "conc_num_factor",
       y_var = delta_var,
@@ -511,7 +457,7 @@ generate_interaction_plot_configs <- function(df, variables) {
     )
 
     # Add box plot configuration for this variable
-    plot_configs[[length(plot_configs) + 1]] <- list(
+    configs[[length(configs) + 1]] <- list(
       df = df,
       x_var = "conc_num_factor",
       y_var = variable,
@@ -526,7 +472,7 @@ generate_interaction_plot_configs <- function(df, variables) {
     )
   }
 
-  return(plot_configs)
+  return(configs)
 }
 
 # Adjust missing values and calculate ranks
@@ -568,6 +514,8 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA
         label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
       )
     )
+
+    return(configs)
   }
   
   # Non-annotated version (_notext)
@@ -588,14 +536,14 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA
 }
 
 generate_correlation_plot_configs <- function(df, variables) {
-  plot_configs <- list()
+  configs <- list()
 
   for (variable in variables) {
     z_lm_var <- paste0("Z_lm_", variable)
     avg_zscore_var <- paste0("Avg_Zscore_", variable)
     lm_r_squared_col <- paste0("lm_R_squared_", variable)
 
-    plot_configs[[length(plot_configs) + 1]] <- list(
+    configs[[length(configs) + 1]] <- list(
       df = df,
       x_var = avg_zscore_var,
       y_var = z_lm_var,
@@ -607,7 +555,7 @@ generate_correlation_plot_configs <- function(df, variables) {
     )
   }
 
-  return(plot_configs)
+  return(configs)
 }
 
 main <- function() {
@@ -647,21 +595,22 @@ main <- function() {
     df_na_stats <- ss$df_with_stats
     write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
 
-    print("Summary stats:")
-    print(head(summary_stats), width = 200)
+    # print("Summary stats:")
+    # print(head(summary_stats), width = 200)
 
     # Remove rows with 0 values in L
     df_no_zeros <- df_na %>% filter(L > 0)
 
     # Additional filtering for non-finite values
+    # Filter and print non-finite rows, then filter and keep only finite rows
     df_na_filtered <- df_na %>%
-      filter(if_any(c(L), ~ !is.finite(.))) %>%
       {
-        if (nrow(.) > 0) {
+        non_finite_rows <- filter(., if_any(c(L), ~ !is.finite(.)))
+        if (nrow(non_finite_rows) > 0) {
           message("Removing non-finite rows:\n")
-          print(head(., n = 10))
+          print(head(non_finite_rows, n = 10))
         }
-        df_na %>% filter(if_all(c(L), is.finite))
+        filter(., if_all(c(L), is.finite))
       }
 
     # Filter data within and outside 2SD
@@ -871,11 +820,11 @@ main <- function() {
       # Calculate interactions
       variables <- c("L", "K", "r", "AUC")
       message("Calculating interaction scores")
-      print("Reference strain:")
-      print(head(reference_strain))
+      # print("Reference strain:")
+      # print(head(reference_strain))
       reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables)
-      print("Deletion strains:")
-      print(head(deletion_strains))
+      # print("Deletion strains:")
+      # print(head(deletion_strains))
       deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables)
       
       zscores_calculations_reference <- reference_results$calculations
@@ -952,7 +901,7 @@ main <- function() {
         generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
       )
       generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
-        plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
+        plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
 
       rank_lm_plot_config <- c(
         generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),