8 months ago · faa82e0af4
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -300,52 +300,45 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
 
				     ungroup() %>%  # Ungroup before group_modify
			
 
				     group_by(across(all_of(group_vars))) %>%
			
 
				     group_modify(~ {
			
 
				-      # Check if there are enough unique conc_num_factor levels to perform lm
			
 
				-      if (length(unique(.x$conc_num_factor)) > 1) {
			
 
				-        
			
 
				-        # Perform linear modeling
			
 
				-        lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
			
 
				-        lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
			
 
				-        lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
			
 
				-        lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
			
 
				-
			
 
				-        # If the model fails, set model-related values to NA
			
 
				-        .x %>%
			
 
				-          mutate(
			
 
				-            lm_intercept_L = ifelse(!is.null(lm_L), coef(lm_L)[1], NA),
			
 
				-            lm_slope_L = ifelse(!is.null(lm_L), coef(lm_L)[2], NA),
			
 
				-            R_Squared_L = ifelse(!is.null(lm_L), summary(lm_L)$r.squared, NA),
			
 
				-            lm_Score_L = ifelse(!is.null(lm_L), max_conc * coef(lm_L)[2] + coef(lm_L)[1], NA),
			
 
				-            
			
 
				-            lm_intercept_K = ifelse(!is.null(lm_K), coef(lm_K)[1], NA),
			
 
				-            lm_slope_K = ifelse(!is.null(lm_K), coef(lm_K)[2], NA),
			
 
				-            R_Squared_K = ifelse(!is.null(lm_K), summary(lm_K)$r.squared, NA),
			
 
				-            lm_Score_K = ifelse(!is.null(lm_K), max_conc * coef(lm_K)[2] + coef(lm_K)[1], NA),
			
 
				-            
			
 
				-            lm_intercept_r = ifelse(!is.null(lm_r), coef(lm_r)[1], NA),
			
 
				-            lm_slope_r = ifelse(!is.null(lm_r), coef(lm_r)[2], NA),
			
 
				-            R_Squared_r = ifelse(!is.null(lm_r), summary(lm_r)$r.squared, NA),
			
 
				-            lm_Score_r = ifelse(!is.null(lm_r), max_conc * coef(lm_r)[2] + coef(lm_r)[1], NA),
			
 
				-            
			
 
				-            lm_intercept_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[1], NA),
			
 
				-            lm_slope_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[2], NA),
			
 
				-            R_Squared_AUC = ifelse(!is.null(lm_AUC), summary(lm_AUC)$r.squared, NA),
			
 
				-            lm_Score_AUC = ifelse(!is.null(lm_AUC), max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1], NA)
			
 
				-          )
			
 
				-      } else {
			
 
				-        # If not enough conc_num_factor levels, set lm-related values to NA
			
 
				-        .x %>%
			
 
				-          mutate(
			
 
				-            lm_intercept_L = NA, lm_slope_L = NA, R_Squared_L = NA, lm_Score_L = NA,
			
 
				-            lm_intercept_K = NA, lm_slope_K = NA, R_Squared_K = NA, lm_Score_K = NA,
			
 
				-            lm_intercept_r = NA, lm_slope_r = NA, R_Squared_r = NA, lm_Score_r = NA,
			
 
				-            lm_intercept_AUC = NA, lm_slope_AUC = NA, R_Squared_AUC = NA, lm_Score_AUC = NA
			
 
				-          )
			
 
				-      }
			
 
				+
			
 
				+      # Filter each column for valid data or else linear modeling will fail
			
 
				+      valid_data_L <- .x %>% filter(!is.na(Delta_L))
			
 
				+      valid_data_K <- .x %>% filter(!is.na(Delta_K))
			
 
				+      valid_data_r <- .x %>% filter(!is.na(Delta_r))
			
 
				+      valid_data_AUC <- .x %>% filter(!is.na(Delta_AUC))
			
 
				+      
			
 
				+      # Perform linear modeling
			
 
				+      lm_L <- if (nrow(valid_data_L) > 1) lm(Delta_L ~ conc_num_factor, data = valid_data_L) else NULL
			
 
				+      lm_K <- if (nrow(valid_data_K) > 1) lm(Delta_K ~ conc_num_factor, data = valid_data_K) else NULL
			
 
				+      lm_r <- if (nrow(valid_data_r) > 1) lm(Delta_r ~ conc_num_factor, data = valid_data_r) else NULL
			
 
				+      lm_AUC <- if (nrow(valid_data_AUC) > 1) lm(Delta_AUC ~ conc_num_factor, data = valid_data_AUC) else NULL
			
 
				+
			
 
				+      # Extract coefficients for calculations and plotting
			
 
				+      .x %>%
			
 
				+        mutate(
			
 
				+          lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA,
			
 
				+          lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA,
			
 
				+          R_Squared_L = if (!is.null(lm_L)) summary(lm_L)$r.squared else NA,
			
 
				+          lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA,
			
 
				+
			
 
				+          lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA,
			
 
				+          lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA,
			
 
				+          R_Squared_K = if (!is.null(lm_K)) summary(lm_K)$r.squared else NA,
			
 
				+          lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA,
			
 
				+
			
 
				+          lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA,
			
 
				+          lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA,
			
 
				+          R_Squared_r = if (!is.null(lm_r)) summary(lm_r)$r.squared else NA,
			
 
				+          lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA,
			
 
				+
			
 
				+          lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA,
			
 
				+          lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA,
			
 
				+          R_Squared_AUC = if (!is.null(lm_AUC)) summary(lm_AUC)$r.squared else NA,
			
 
				+          lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA
			
 
				+        )
			
 
				     }) %>%
			
 
				     ungroup()
			
 
				 
			
 
				-
			
 
				   # For interaction plot error bars
			
 
				   delta_means_sds <- calculations %>%
			
 
				     group_by(across(all_of(group_vars))) %>%
			
@@ -631,78 +624,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
 
				         }
			
 
				       }
			
 
				 
			
 
				-      # Add error bars if specified
			
 
				-      if (!is.null(config$error_bar) && config$error_bar) {
			
 
				-        # Check if custom columns are provided for y_mean and y_sd, or use the defaults
			
 
				-        y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) {
			
 
				-          config$error_bar_params$y_mean_col
			
 
				-        } else {
			
 
				-          paste0("mean_", config$y_var)
			
 
				-        }
			
 
				-
			
 
				-        y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) {
			
 
				-          config$error_bar_params$y_sd_col
			
 
				-        } else {
			
 
				-          paste0("sd_", config$y_var)
			
 
				-        }
			
 
				-
			
 
				-        # Use rlang to handle custom error bar calculations
			
 
				-        if (!is.null(config$error_bar_params$custom_error_bar)) {
			
 
				-          custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin)
			
 
				-          custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax)
			
 
				-
			
 
				-          plot <- plot + geom_errorbar(
			
 
				-            aes(
			
 
				-              ymin = !!custom_ymin_expr,
			
 
				-              ymax = !!custom_ymax_expr
			
 
				-            ),
			
 
				-            color = config$error_bar_params$color,
			
 
				-            linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
			
 
				-          )
			
 
				-        } else {
			
 
				-          # If no custom error bar formula, use the default or dynamic ones
			
 
				-          if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) {
			
 
				-            # Only use color_var if it's present in the dataframe
			
 
				-            plot <- plot + geom_errorbar(
			
 
				-              aes(
			
 
				-                ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
			
 
				-                ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
			
 
				-                color = .data[[config$color_var]]
			
 
				-              ),
			
 
				-              linewidth = 0.1
			
 
				-            )
			
 
				-          } else {
			
 
				-            # If color_var is missing, fall back to a default color or none
			
 
				-            plot <- plot + geom_errorbar(
			
 
				-              aes(
			
 
				-                ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
			
 
				-                ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
			
 
				-              ),
			
 
				-              color = config$error_bar_params$color, # use the provided color or default
			
 
				-              linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
			
 
				-            )
			
 
				-          }
			
 
				-        }
			
 
				-
			
 
				-        # Add the center point if the option is provided
			
 
				-        if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
			
 
				-          if (!is.null(config$error_bar_params$color)) {
			
 
				-            plot <- plot + geom_point(
			
 
				-              mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
			
 
				-              color = config$error_bar_params$color,
			
 
				-              shape = 16,
			
 
				-              inherit.aes = FALSE  # Prevent overriding global aesthetics
			
 
				-            )
			
 
				-          } else {
			
 
				-            plot <- plot + geom_point(
			
 
				-              mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
			
 
				-              shape = 16,
			
 
				-              inherit.aes = FALSE  # Prevent overriding global aesthetics
			
 
				-            )
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				       # Convert ggplot to plotly for interactive version
			
 
				       plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
			
 
				 
			
@@ -729,16 +650,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
 
				       total_spots <- grid_layout$nrow * grid_layout$ncol
			
 
				       num_plots <- length(static_plots)
			
 
				 
			
 
				-      # if (num_plots < total_spots) {
			
 
				-      #   message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
			
 
				-      #   static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
			
 
				-      # }
			
 
				+      if (num_plots < total_spots) {
			
 
				+        message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
			
 
				+        static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
			
 
				+      }
			
 
				 
			
 
				+      # Print a page of gridded plots
			
 
				       grid.arrange(
			
 
				         grobs = static_plots,
			
 
				         ncol = grid_layout$ncol,
			
 
				-        nrow = grid_layout$nrow
			
 
				-      )
			
 
				+        nrow = grid_layout$nrow)
			
 
				+        
			
 
				     } else {
			
 
				       # Print individual plots on separate pages if no grid layout
			
 
				       for (plot in static_plots) {
			
@@ -788,6 +710,78 @@ generate_scatter_plot <- function(plot, config) {
 
				       inherit.aes = FALSE
			
 
				     )
			
 
				   }
			
 
				+
			
 
				+  # Add error bars if specified
			
 
				+  if (!is.null(config$error_bar) && config$error_bar) {
			
 
				+    # Check if custom columns are provided for y_mean and y_sd, or use the defaults
			
 
				+    y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) {
			
 
				+      config$error_bar_params$y_mean_col
			
 
				+    } else {
			
 
				+      paste0("mean_", config$y_var)
			
 
				+    }
			
 
				+
			
 
				+    y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) {
			
 
				+      config$error_bar_params$y_sd_col
			
 
				+    } else {
			
 
				+      paste0("sd_", config$y_var)
			
 
				+    }
			
 
				+
			
 
				+    # Use rlang to handle custom error bar calculations
			
 
				+    if (!is.null(config$error_bar_params$custom_error_bar)) {
			
 
				+      custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin)
			
 
				+      custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax)
			
 
				+
			
 
				+      plot <- plot + geom_errorbar(
			
 
				+        aes(
			
 
				+          ymin = !!custom_ymin_expr,
			
 
				+          ymax = !!custom_ymax_expr
			
 
				+        ),
			
 
				+        color = config$error_bar_params$color,
			
 
				+        linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
			
 
				+      )
			
 
				+    } else {
			
 
				+      # If no custom error bar formula, use the default or dynamic ones
			
 
				+      if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) {
			
 
				+        # Only use color_var if it's present in the dataframe
			
 
				+        plot <- plot + geom_errorbar(
			
 
				+          aes(
			
 
				+            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
			
 
				+            ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
			
 
				+            color = .data[[config$color_var]]
			
 
				+          ),
			
 
				+          linewidth = 0.1
			
 
				+        )
			
 
				+      } else {
			
 
				+        # If color_var is missing, fall back to a default color or none
			
 
				+        plot <- plot + geom_errorbar(
			
 
				+          aes(
			
 
				+            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
			
 
				+            ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
			
 
				+          ),
			
 
				+          color = config$error_bar_params$color, # use the provided color or default
			
 
				+          linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
			
 
				+        )
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    # Add the center point if the option is provided
			
 
				+    if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
			
 
				+      if (!is.null(config$error_bar_params$color)) {
			
 
				+        plot <- plot + geom_point(
			
 
				+          mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
			
 
				+          color = config$error_bar_params$color,
			
 
				+          shape = 16,
			
 
				+          inherit.aes = FALSE  # Prevent overriding global aesthetics
			
 
				+        )
			
 
				+      } else {
			
 
				+        plot <- plot + geom_point(
			
 
				+          mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
			
 
				+          shape = 16,
			
 
				+          inherit.aes = FALSE  # Prevent overriding global aesthetics
			
 
				+        )
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				   
			
 
				   # Add linear regression line if specified
			
 
				   if (!is.null(config$lm_line)) {
			
@@ -1570,7 +1564,7 @@ main <- function() {
 
				         ) %>%
			
 
				         filter(!is.na(L))
			
 
				       
			
 
				-      message("Calculating background strain summary statistics")
			
 
				+      message("Calculating background summary statistics")
			
 
				       ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
			
 
				         group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
			
 
				       summary_stats_bg <- ss_bg$summary_stats
			
@@ -1621,16 +1615,16 @@ main <- function() {
 
				         group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
			
 
				         )$df_with_stats
			
 
				       
			
 
				-      message("Calculating reference strain interaction scores")
			
 
				-      reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
			
 
				-      df_reference_interactions_joined <- reference_results$full_data
			
 
				-      df_reference_interactions <- reference_results$interactions
			
 
				-      write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
			
 
				-      write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
			
 
				-
			
 
				-      message("Generating reference interaction plots")
			
 
				-      reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
			
 
				-      generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
			
 
				+      # message("Calculating reference strain interaction scores")
			
 
				+      # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
			
 
				+      # df_reference_interactions_joined <- reference_results$full_data
			
 
				+      # df_reference_interactions <- reference_results$interactions
			
 
				+      # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
			
 
				+      # write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
			
 
				+
			
 
				+      # message("Generating reference interaction plots")
			
 
				+      # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
			
 
				+      # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
			
 
				 
			
 
				       message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
			
 
				       df_deletion <- df_na_stats %>% # formerly X2