From faa82e0af487618b7200c34f05ccd7819a86c399 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 6 Oct 2024 14:58:54 -0400 Subject: [PATCH] Move error bars to generate_scattter_plots() --- .../apps/r/calculate_interaction_zscores.R | 254 +++++++++--------- 1 file changed, 124 insertions(+), 130 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 56bc5b40..44e29b29 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -300,52 +300,45 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) ungroup() %>% # Ungroup before group_modify group_by(across(all_of(group_vars))) %>% group_modify(~ { - # Check if there are enough unique conc_num_factor levels to perform lm - if (length(unique(.x$conc_num_factor)) > 1) { - - # Perform linear modeling - lm_L <- lm(Delta_L ~ conc_num_factor, data = .x) - lm_K <- lm(Delta_K ~ conc_num_factor, data = .x) - lm_r <- lm(Delta_r ~ conc_num_factor, data = .x) - lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x) - # If the model fails, set model-related values to NA - .x %>% - mutate( - lm_intercept_L = ifelse(!is.null(lm_L), coef(lm_L)[1], NA), - lm_slope_L = ifelse(!is.null(lm_L), coef(lm_L)[2], NA), - R_Squared_L = ifelse(!is.null(lm_L), summary(lm_L)$r.squared, NA), - lm_Score_L = ifelse(!is.null(lm_L), max_conc * coef(lm_L)[2] + coef(lm_L)[1], NA), - - lm_intercept_K = ifelse(!is.null(lm_K), coef(lm_K)[1], NA), - lm_slope_K = ifelse(!is.null(lm_K), coef(lm_K)[2], NA), - R_Squared_K = ifelse(!is.null(lm_K), summary(lm_K)$r.squared, NA), - lm_Score_K = ifelse(!is.null(lm_K), max_conc * coef(lm_K)[2] + coef(lm_K)[1], NA), - - lm_intercept_r = ifelse(!is.null(lm_r), coef(lm_r)[1], NA), - lm_slope_r = ifelse(!is.null(lm_r), coef(lm_r)[2], NA), - R_Squared_r = ifelse(!is.null(lm_r), summary(lm_r)$r.squared, NA), - lm_Score_r = ifelse(!is.null(lm_r), max_conc * coef(lm_r)[2] + coef(lm_r)[1], NA), - - lm_intercept_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[1], NA), - lm_slope_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[2], NA), - R_Squared_AUC = ifelse(!is.null(lm_AUC), summary(lm_AUC)$r.squared, NA), - lm_Score_AUC = ifelse(!is.null(lm_AUC), max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1], NA) - ) - } else { - # If not enough conc_num_factor levels, set lm-related values to NA - .x %>% - mutate( - lm_intercept_L = NA, lm_slope_L = NA, R_Squared_L = NA, lm_Score_L = NA, - lm_intercept_K = NA, lm_slope_K = NA, R_Squared_K = NA, lm_Score_K = NA, - lm_intercept_r = NA, lm_slope_r = NA, R_Squared_r = NA, lm_Score_r = NA, - lm_intercept_AUC = NA, lm_slope_AUC = NA, R_Squared_AUC = NA, lm_Score_AUC = NA - ) - } + # Filter each column for valid data or else linear modeling will fail + valid_data_L <- .x %>% filter(!is.na(Delta_L)) + valid_data_K <- .x %>% filter(!is.na(Delta_K)) + valid_data_r <- .x %>% filter(!is.na(Delta_r)) + valid_data_AUC <- .x %>% filter(!is.na(Delta_AUC)) + + # Perform linear modeling + lm_L <- if (nrow(valid_data_L) > 1) lm(Delta_L ~ conc_num_factor, data = valid_data_L) else NULL + lm_K <- if (nrow(valid_data_K) > 1) lm(Delta_K ~ conc_num_factor, data = valid_data_K) else NULL + lm_r <- if (nrow(valid_data_r) > 1) lm(Delta_r ~ conc_num_factor, data = valid_data_r) else NULL + lm_AUC <- if (nrow(valid_data_AUC) > 1) lm(Delta_AUC ~ conc_num_factor, data = valid_data_AUC) else NULL + + # Extract coefficients for calculations and plotting + .x %>% + mutate( + lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA, + lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA, + R_Squared_L = if (!is.null(lm_L)) summary(lm_L)$r.squared else NA, + lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA, + + lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA, + lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA, + R_Squared_K = if (!is.null(lm_K)) summary(lm_K)$r.squared else NA, + lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA, + + lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA, + lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA, + R_Squared_r = if (!is.null(lm_r)) summary(lm_r)$r.squared else NA, + lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA, + + lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA, + lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA, + R_Squared_AUC = if (!is.null(lm_AUC)) summary(lm_AUC)$r.squared else NA, + lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA + ) }) %>% ungroup() - # For interaction plot error bars delta_means_sds <- calculations %>% group_by(across(all_of(group_vars))) %>% @@ -631,78 +624,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width } } - # Add error bars if specified - if (!is.null(config$error_bar) && config$error_bar) { - # Check if custom columns are provided for y_mean and y_sd, or use the defaults - y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) { - config$error_bar_params$y_mean_col - } else { - paste0("mean_", config$y_var) - } - - y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) { - config$error_bar_params$y_sd_col - } else { - paste0("sd_", config$y_var) - } - - # Use rlang to handle custom error bar calculations - if (!is.null(config$error_bar_params$custom_error_bar)) { - custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin) - custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax) - - plot <- plot + geom_errorbar( - aes( - ymin = !!custom_ymin_expr, - ymax = !!custom_ymax_expr - ), - color = config$error_bar_params$color, - linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth) - ) - } else { - # If no custom error bar formula, use the default or dynamic ones - if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) { - # Only use color_var if it's present in the dataframe - plot <- plot + geom_errorbar( - aes( - ymin = .data[[y_mean_col]] - .data[[y_sd_col]], - ymax = .data[[y_mean_col]] + .data[[y_sd_col]], - color = .data[[config$color_var]] - ), - linewidth = 0.1 - ) - } else { - # If color_var is missing, fall back to a default color or none - plot <- plot + geom_errorbar( - aes( - ymin = .data[[y_mean_col]] - .data[[y_sd_col]], - ymax = .data[[y_mean_col]] + .data[[y_sd_col]] - ), - color = config$error_bar_params$color, # use the provided color or default - linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth) - ) - } - } - - # Add the center point if the option is provided - if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) { - if (!is.null(config$error_bar_params$color)) { - plot <- plot + geom_point( - mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings - color = config$error_bar_params$color, - shape = 16, - inherit.aes = FALSE # Prevent overriding global aesthetics - ) - } else { - plot <- plot + geom_point( - mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings - shape = 16, - inherit.aes = FALSE # Prevent overriding global aesthetics - ) - } - } - } - # Convert ggplot to plotly for interactive version plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) @@ -729,16 +650,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width total_spots <- grid_layout$nrow * grid_layout$ncol num_plots <- length(static_plots) - # if (num_plots < total_spots) { - # message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") - # static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) - # } + if (num_plots < total_spots) { + message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") + static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) + } + # Print a page of gridded plots grid.arrange( grobs = static_plots, ncol = grid_layout$ncol, - nrow = grid_layout$nrow - ) + nrow = grid_layout$nrow) + } else { # Print individual plots on separate pages if no grid layout for (plot in static_plots) { @@ -788,6 +710,78 @@ generate_scatter_plot <- function(plot, config) { inherit.aes = FALSE ) } + + # Add error bars if specified + if (!is.null(config$error_bar) && config$error_bar) { + # Check if custom columns are provided for y_mean and y_sd, or use the defaults + y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) { + config$error_bar_params$y_mean_col + } else { + paste0("mean_", config$y_var) + } + + y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) { + config$error_bar_params$y_sd_col + } else { + paste0("sd_", config$y_var) + } + + # Use rlang to handle custom error bar calculations + if (!is.null(config$error_bar_params$custom_error_bar)) { + custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin) + custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax) + + plot <- plot + geom_errorbar( + aes( + ymin = !!custom_ymin_expr, + ymax = !!custom_ymax_expr + ), + color = config$error_bar_params$color, + linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth) + ) + } else { + # If no custom error bar formula, use the default or dynamic ones + if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) { + # Only use color_var if it's present in the dataframe + plot <- plot + geom_errorbar( + aes( + ymin = .data[[y_mean_col]] - .data[[y_sd_col]], + ymax = .data[[y_mean_col]] + .data[[y_sd_col]], + color = .data[[config$color_var]] + ), + linewidth = 0.1 + ) + } else { + # If color_var is missing, fall back to a default color or none + plot <- plot + geom_errorbar( + aes( + ymin = .data[[y_mean_col]] - .data[[y_sd_col]], + ymax = .data[[y_mean_col]] + .data[[y_sd_col]] + ), + color = config$error_bar_params$color, # use the provided color or default + linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth) + ) + } + } + + # Add the center point if the option is provided + if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) { + if (!is.null(config$error_bar_params$color)) { + plot <- plot + geom_point( + mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings + color = config$error_bar_params$color, + shape = 16, + inherit.aes = FALSE # Prevent overriding global aesthetics + ) + } else { + plot <- plot + geom_point( + mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings + shape = 16, + inherit.aes = FALSE # Prevent overriding global aesthetics + ) + } + } + } # Add linear regression line if specified if (!is.null(config$lm_line)) { @@ -1570,7 +1564,7 @@ main <- function() { ) %>% filter(!is.na(L)) - message("Calculating background strain summary statistics") + message("Calculating background summary statistics") ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor")) summary_stats_bg <- ss_bg$summary_stats @@ -1621,16 +1615,16 @@ main <- function() { group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") )$df_with_stats - message("Calculating reference strain interaction scores") - reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") - df_reference_interactions_joined <- reference_results$full_data - df_reference_interactions <- reference_results$interactions - write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) - write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) + # message("Calculating reference strain interaction scores") + # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") + # df_reference_interactions_joined <- reference_results$full_data + # df_reference_interactions <- reference_results$interactions + # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) + # write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) - message("Generating reference interaction plots") - reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") - generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) + # message("Generating reference interaction plots") + # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") + # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) message("Setting missing deletion values to the highest theoretical value at each drug conc for L") df_deletion <- df_na_stats %>% # formerly X2