浏览代码

Move error bars to generate_scattter_plots()

Bryan Roessler 6 月之前
父节点
当前提交
faa82e0af4
共有 1 个文件被更改,包括 126 次插入132 次删除
  1. 126 132
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 126 - 132
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -300,52 +300,45 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
     ungroup() %>%  # Ungroup before group_modify
     ungroup() %>%  # Ungroup before group_modify
     group_by(across(all_of(group_vars))) %>%
     group_by(across(all_of(group_vars))) %>%
     group_modify(~ {
     group_modify(~ {
-      # Check if there are enough unique conc_num_factor levels to perform lm
-      if (length(unique(.x$conc_num_factor)) > 1) {
-        
-        # Perform linear modeling
-        lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
-        lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
-        lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
-        lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
-
-        # If the model fails, set model-related values to NA
-        .x %>%
-          mutate(
-            lm_intercept_L = ifelse(!is.null(lm_L), coef(lm_L)[1], NA),
-            lm_slope_L = ifelse(!is.null(lm_L), coef(lm_L)[2], NA),
-            R_Squared_L = ifelse(!is.null(lm_L), summary(lm_L)$r.squared, NA),
-            lm_Score_L = ifelse(!is.null(lm_L), max_conc * coef(lm_L)[2] + coef(lm_L)[1], NA),
-            
-            lm_intercept_K = ifelse(!is.null(lm_K), coef(lm_K)[1], NA),
-            lm_slope_K = ifelse(!is.null(lm_K), coef(lm_K)[2], NA),
-            R_Squared_K = ifelse(!is.null(lm_K), summary(lm_K)$r.squared, NA),
-            lm_Score_K = ifelse(!is.null(lm_K), max_conc * coef(lm_K)[2] + coef(lm_K)[1], NA),
-            
-            lm_intercept_r = ifelse(!is.null(lm_r), coef(lm_r)[1], NA),
-            lm_slope_r = ifelse(!is.null(lm_r), coef(lm_r)[2], NA),
-            R_Squared_r = ifelse(!is.null(lm_r), summary(lm_r)$r.squared, NA),
-            lm_Score_r = ifelse(!is.null(lm_r), max_conc * coef(lm_r)[2] + coef(lm_r)[1], NA),
-            
-            lm_intercept_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[1], NA),
-            lm_slope_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[2], NA),
-            R_Squared_AUC = ifelse(!is.null(lm_AUC), summary(lm_AUC)$r.squared, NA),
-            lm_Score_AUC = ifelse(!is.null(lm_AUC), max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1], NA)
-          )
-      } else {
-        # If not enough conc_num_factor levels, set lm-related values to NA
-        .x %>%
-          mutate(
-            lm_intercept_L = NA, lm_slope_L = NA, R_Squared_L = NA, lm_Score_L = NA,
-            lm_intercept_K = NA, lm_slope_K = NA, R_Squared_K = NA, lm_Score_K = NA,
-            lm_intercept_r = NA, lm_slope_r = NA, R_Squared_r = NA, lm_Score_r = NA,
-            lm_intercept_AUC = NA, lm_slope_AUC = NA, R_Squared_AUC = NA, lm_Score_AUC = NA
-          )
-      }
+
+      # Filter each column for valid data or else linear modeling will fail
+      valid_data_L <- .x %>% filter(!is.na(Delta_L))
+      valid_data_K <- .x %>% filter(!is.na(Delta_K))
+      valid_data_r <- .x %>% filter(!is.na(Delta_r))
+      valid_data_AUC <- .x %>% filter(!is.na(Delta_AUC))
+      
+      # Perform linear modeling
+      lm_L <- if (nrow(valid_data_L) > 1) lm(Delta_L ~ conc_num_factor, data = valid_data_L) else NULL
+      lm_K <- if (nrow(valid_data_K) > 1) lm(Delta_K ~ conc_num_factor, data = valid_data_K) else NULL
+      lm_r <- if (nrow(valid_data_r) > 1) lm(Delta_r ~ conc_num_factor, data = valid_data_r) else NULL
+      lm_AUC <- if (nrow(valid_data_AUC) > 1) lm(Delta_AUC ~ conc_num_factor, data = valid_data_AUC) else NULL
+
+      # Extract coefficients for calculations and plotting
+      .x %>%
+        mutate(
+          lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA,
+          lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA,
+          R_Squared_L = if (!is.null(lm_L)) summary(lm_L)$r.squared else NA,
+          lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA,
+
+          lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA,
+          lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA,
+          R_Squared_K = if (!is.null(lm_K)) summary(lm_K)$r.squared else NA,
+          lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA,
+
+          lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA,
+          lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA,
+          R_Squared_r = if (!is.null(lm_r)) summary(lm_r)$r.squared else NA,
+          lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA,
+
+          lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA,
+          lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA,
+          R_Squared_AUC = if (!is.null(lm_AUC)) summary(lm_AUC)$r.squared else NA,
+          lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA
+        )
     }) %>%
     }) %>%
     ungroup()
     ungroup()
 
 
-
   # For interaction plot error bars
   # For interaction plot error bars
   delta_means_sds <- calculations %>%
   delta_means_sds <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
     group_by(across(all_of(group_vars))) %>%
@@ -631,78 +624,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
         }
         }
       }
       }
 
 
-      # Add error bars if specified
-      if (!is.null(config$error_bar) && config$error_bar) {
-        # Check if custom columns are provided for y_mean and y_sd, or use the defaults
-        y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) {
-          config$error_bar_params$y_mean_col
-        } else {
-          paste0("mean_", config$y_var)
-        }
-
-        y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) {
-          config$error_bar_params$y_sd_col
-        } else {
-          paste0("sd_", config$y_var)
-        }
-
-        # Use rlang to handle custom error bar calculations
-        if (!is.null(config$error_bar_params$custom_error_bar)) {
-          custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin)
-          custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax)
-
-          plot <- plot + geom_errorbar(
-            aes(
-              ymin = !!custom_ymin_expr,
-              ymax = !!custom_ymax_expr
-            ),
-            color = config$error_bar_params$color,
-            linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
-          )
-        } else {
-          # If no custom error bar formula, use the default or dynamic ones
-          if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) {
-            # Only use color_var if it's present in the dataframe
-            plot <- plot + geom_errorbar(
-              aes(
-                ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
-                ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
-                color = .data[[config$color_var]]
-              ),
-              linewidth = 0.1
-            )
-          } else {
-            # If color_var is missing, fall back to a default color or none
-            plot <- plot + geom_errorbar(
-              aes(
-                ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
-                ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
-              ),
-              color = config$error_bar_params$color, # use the provided color or default
-              linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
-            )
-          }
-        }
-
-        # Add the center point if the option is provided
-        if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
-          if (!is.null(config$error_bar_params$color)) {
-            plot <- plot + geom_point(
-              mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
-              color = config$error_bar_params$color,
-              shape = 16,
-              inherit.aes = FALSE  # Prevent overriding global aesthetics
-            )
-          } else {
-            plot <- plot + geom_point(
-              mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
-              shape = 16,
-              inherit.aes = FALSE  # Prevent overriding global aesthetics
-            )
-          }
-        }
-      }
-
       # Convert ggplot to plotly for interactive version
       # Convert ggplot to plotly for interactive version
       plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
       plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
 
 
@@ -729,16 +650,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
       total_spots <- grid_layout$nrow * grid_layout$ncol
       total_spots <- grid_layout$nrow * grid_layout$ncol
       num_plots <- length(static_plots)
       num_plots <- length(static_plots)
 
 
-      # if (num_plots < total_spots) {
-      #   message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
-      #   static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
-      # }
+      if (num_plots < total_spots) {
+        message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
+        static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
+      }
 
 
+      # Print a page of gridded plots
       grid.arrange(
       grid.arrange(
         grobs = static_plots,
         grobs = static_plots,
         ncol = grid_layout$ncol,
         ncol = grid_layout$ncol,
-        nrow = grid_layout$nrow
-      )
+        nrow = grid_layout$nrow)
+        
     } else {
     } else {
       # Print individual plots on separate pages if no grid layout
       # Print individual plots on separate pages if no grid layout
       for (plot in static_plots) {
       for (plot in static_plots) {
@@ -788,6 +710,78 @@ generate_scatter_plot <- function(plot, config) {
       inherit.aes = FALSE
       inherit.aes = FALSE
     )
     )
   }
   }
+
+  # Add error bars if specified
+  if (!is.null(config$error_bar) && config$error_bar) {
+    # Check if custom columns are provided for y_mean and y_sd, or use the defaults
+    y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) {
+      config$error_bar_params$y_mean_col
+    } else {
+      paste0("mean_", config$y_var)
+    }
+
+    y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) {
+      config$error_bar_params$y_sd_col
+    } else {
+      paste0("sd_", config$y_var)
+    }
+
+    # Use rlang to handle custom error bar calculations
+    if (!is.null(config$error_bar_params$custom_error_bar)) {
+      custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin)
+      custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax)
+
+      plot <- plot + geom_errorbar(
+        aes(
+          ymin = !!custom_ymin_expr,
+          ymax = !!custom_ymax_expr
+        ),
+        color = config$error_bar_params$color,
+        linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
+      )
+    } else {
+      # If no custom error bar formula, use the default or dynamic ones
+      if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) {
+        # Only use color_var if it's present in the dataframe
+        plot <- plot + geom_errorbar(
+          aes(
+            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
+            ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
+            color = .data[[config$color_var]]
+          ),
+          linewidth = 0.1
+        )
+      } else {
+        # If color_var is missing, fall back to a default color or none
+        plot <- plot + geom_errorbar(
+          aes(
+            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
+            ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
+          ),
+          color = config$error_bar_params$color, # use the provided color or default
+          linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
+        )
+      }
+    }
+
+    # Add the center point if the option is provided
+    if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
+      if (!is.null(config$error_bar_params$color)) {
+        plot <- plot + geom_point(
+          mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
+          color = config$error_bar_params$color,
+          shape = 16,
+          inherit.aes = FALSE  # Prevent overriding global aesthetics
+        )
+      } else {
+        plot <- plot + geom_point(
+          mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
+          shape = 16,
+          inherit.aes = FALSE  # Prevent overriding global aesthetics
+        )
+      }
+    }
+  }
   
   
   # Add linear regression line if specified
   # Add linear regression line if specified
   if (!is.null(config$lm_line)) {
   if (!is.null(config$lm_line)) {
@@ -1570,7 +1564,7 @@ main <- function() {
         ) %>%
         ) %>%
         filter(!is.na(L))
         filter(!is.na(L))
       
       
-      message("Calculating background strain summary statistics")
+      message("Calculating background summary statistics")
       ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
       ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
         group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
         group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
       summary_stats_bg <- ss_bg$summary_stats
       summary_stats_bg <- ss_bg$summary_stats
@@ -1621,16 +1615,16 @@ main <- function() {
         group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
         group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
         )$df_with_stats
         )$df_with_stats
       
       
-      message("Calculating reference strain interaction scores")
-      reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
-      df_reference_interactions_joined <- reference_results$full_data
-      df_reference_interactions <- reference_results$interactions
-      write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
-      write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
-
-      message("Generating reference interaction plots")
-      reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
-      generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
+      # message("Calculating reference strain interaction scores")
+      # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
+      # df_reference_interactions_joined <- reference_results$full_data
+      # df_reference_interactions <- reference_results$interactions
+      # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
+      # write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
+
+      # message("Generating reference interaction plots")
+      # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
+      # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
 
 
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
       df_deletion <- df_na_stats %>% # formerly X2
       df_deletion <- df_na_stats %>% # formerly X2