diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 462743a4..c6124858 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -155,9 +155,8 @@ update_gene_names <- function(df, sgd_gene_list) { # Calculate summary statistics for all variables calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { - df <- df %>% - mutate(across(all_of(variables), ~ ifelse(. == 0, NA, .))) + # Summarize the variables within the grouped data summary_stats <- df %>% group_by(across(all_of(group_vars))) %>% summarise( @@ -165,16 +164,18 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c across(all_of(variables), list( mean = ~mean(., na.rm = TRUE), median = ~median(., na.rm = TRUE), - max = ~max(., na.rm = TRUE), - min = ~min(., na.rm = TRUE), + max = ~ ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), + min = ~ ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), sd = ~sd(., na.rm = TRUE), - se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1) + se = ~ ifelse(all(is.na(.)), NA, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1)) ), .names = "{.fn}_{.col}") ) - + + # Prevent .x and .y suffix issues by renaming columns df_cleaned <- df %>% - select(-any_of(names(summary_stats))) - + select(-any_of(setdiff(names(summary_stats), group_vars))) # Avoid duplicate columns in the final join + + # Join the stats back to the original dataframe df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars) return(list(summary_stats = summary_stats, df_with_stats = df_with_stats)) @@ -336,45 +337,57 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { - `%||%` <- function(a, b) if (!is.null(a)) a else b - - plots <- lapply(plot_configs, function(config) { - df <- config$df - plot <- ggplot(df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))) - - # Handle plot types like "rank", "correlation", and default scatter/box/density - if (config$plot_type == "rank") { - plot <- plot + geom_point(size = 0.1, shape = 3) - if (!is.null(config$sd_band)) { - for (i in seq_len(config$sd_band)) { - plot <- plot + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + - annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + - geom_hline(yintercept = c(-i, i), color = "gray") + # Helper function for plot type logic + apply_plot_type <- function(plot, config) { + switch(config$plot_type, + "rank" = { + plot <- plot + geom_point(size = 0.1, shape = 3) + if (!is.null(config$sd_band)) { + for (i in seq_len(config$sd_band)) { + plot <- plot + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + + geom_hline(yintercept = c(-i, i), color = "gray") + } } - } - if (!is.null(config$enhancer_label)) { - plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) + - annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) - } - } else if (config$plot_type == "correlation") { - plot <- plot + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + - annotate("text", x = 0, y = 0, label = config$correlation_text) - } else { - plot <- plot + aes(y = !!sym(config$y_var)) + - if (config$plot_type == "box") geom_boxplot() else - if (config$plot_type == "density") geom_density() else - if (config$plot_type == "bar") geom_bar(stat = "identity") else geom_point(shape = 3) + geom_smooth(method = "lm", se = FALSE) - } + plot + }, + "correlation" = { + plot + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + + annotate("text", x = 0, y = 0, label = config$correlation_text) + }, + "box" = plot + geom_boxplot(), + "density" = plot + geom_density(), + "bar" = plot + geom_bar(stat = "identity"), + plot + geom_point(shape = 3) + geom_smooth(method = "lm", se = FALSE) # Default scatter plot + ) + } - # Add error bars for "delta_bg" or general cases - if (config$error_bar %||% FALSE) { + # Helper function for error bars + apply_error_bars <- function(plot, config) { + if (!is.null(config$error_bar) && config$error_bar) { y_mean_col <- paste0("mean_", config$y_var) y_sd_col <- paste0("sd_", config$y_var) plot <- plot + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), - ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + + ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) } + plot + } + + # Helper function for annotations + apply_annotations <- function(plot, config) { + if (!is.null(config$annotations)) { + plot <- plot + geom_text(aes(x = config$annotations$x, y = config$annotations$y, label = config$annotations$label)) + } + plot + } + + # Generate each plot + plots <- lapply(plot_configs, function(config) { + plot <- ggplot(config$df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))) + plot <- apply_plot_type(plot, config) + plot <- apply_error_bars(plot, config) # Apply y-limits if provided if (!is.null(config$ylim_vals)) { @@ -382,27 +395,25 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la } # Apply labels, titles, and legends - plot <- plot + ggtitle(config$title) + theme_publication(legend_position = config$legend_position %||% "bottom") + - if (!is.null(config$x_label)) xlab(config$x_label) else NULL + - if (!is.null(config$y_label)) ylab(config$y_label) else NULL + plot <- plot + ggtitle(config$title) + + theme_publication(legend_position = if (!is.null(config$legend_position)) config$legend_position else "bottom") + + xlab(config$x_label %||% "") + ylab(config$y_label %||% "") - # Add annotations if available - if (!is.null(config$annotations)) { - plot <- plot + geom_text(aes(x = config$annotations$x, y = config$annotations$y, label = config$annotations$label)) - } + plot <- apply_annotations(plot, config) return(plot) }) - # Save the plots + # Save plots to PDF pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) lapply(plots, print) dev.off() + # Generate Plotly versions for interactive HTML plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))) # Handle grid layout - combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05) + combined_plot <- subplot(plotly_plots, nrows = if (!is.null(grid_layout)) grid_layout$nrow else length(plots), margin = 0.05) saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) } @@ -727,7 +738,7 @@ main <- function() { } l_outside_2sd_k_plots <- list( - list(df = X_outside_2SD_K, x_var = "l", y_var = "K", plot_type = "scatter", + list(df = df_na_l_outside_2sd_k_stats, x_var = "l", y_var = "K", plot_type = "scatter", title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", legend_position = "right" @@ -735,7 +746,7 @@ main <- function() { ) delta_bg_outside_2sd_k_plots <- list( - list(df = X_outside_2SD_K, x_var = "delta_bg", y_var = "K", plot_type = "scatter", + list(df = df_na_l_outside_2sd_k_stats, x_var = "delta_bg", y_var = "K", plot_type = "scatter", title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num", legend_position = "right"