diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index f17a77d5..462743a4 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -337,18 +337,14 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { `%||%` <- function(a, b) if (!is.null(a)) a else b - - # Generalized plot generation + plots <- lapply(plot_configs, function(config) { - df <- config$df plot <- ggplot(df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))) - - # Rank plots with SD annotations + + # Handle plot types like "rank", "correlation", and default scatter/box/density if (config$plot_type == "rank") { plot <- plot + geom_point(size = 0.1, shape = 3) - - # Adding SD bands if (!is.null(config$sd_band)) { for (i in seq_len(config$sd_band)) { plot <- plot + @@ -357,111 +353,61 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la geom_hline(yintercept = c(-i, i), color = "gray") } } - - # Optionally add enhancer/suppressor count annotations if (!is.null(config$enhancer_label)) { - plot <- plot + annotate("text", x = config$enhancer_label$x, - y = config$enhancer_label$y, label = config$enhancer_label$label) + - annotate("text", x = config$suppressor_label$x, - y = config$suppressor_label$y, label = config$suppressor_label$label) + plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) + + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label) } - } - - # Correlation plots - if (config$plot_type == "correlation") { - plot <- plot + geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + + } else if (config$plot_type == "correlation") { + plot <- plot + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") + annotate("text", x = 0, y = 0, label = config$correlation_text) + } else { + plot <- plot + aes(y = !!sym(config$y_var)) + + if (config$plot_type == "box") geom_boxplot() else + if (config$plot_type == "density") geom_density() else + if (config$plot_type == "bar") geom_bar(stat = "identity") else geom_point(shape = 3) + geom_smooth(method = "lm", se = FALSE) } - # General scatter/boxplot/density handling - if (!is.null(config$y_var)) { - plot <- plot + aes(y = !!sym(config$y_var)) - + # Add error bars for "delta_bg" or general cases + if (config$error_bar %||% FALSE) { y_mean_col <- paste0("mean_", config$y_var) y_sd_col <- paste0("sd_", config$y_var) - - if (config$y_var == "delta_bg" && config$plot_type == "scatter") { - plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") + - geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), - ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + - geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) - } else if (config$error_bar %||% FALSE) { - plot <- plot + - geom_point(shape = 3, size = 0.2) + - geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), - ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + - geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) - } + plot <- plot + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), + ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + + geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) } - # Plot type selection - plot <- switch(config$plot_type, - "box" = plot + geom_boxplot(), - "density" = plot + geom_density(), - "bar" = plot + geom_bar(stat = "identity"), - plot + geom_point() + geom_smooth(method = "lm", se = FALSE)) - - # Adding y-limits if provided + # Apply y-limits if provided if (!is.null(config$ylim_vals)) { plot <- plot + coord_cartesian(ylim = config$ylim_vals) } - # Setting legend position, titles, and labels - legend_position <- config$legend_position %||% "bottom" - plot <- plot + ggtitle(config$title) + theme_publication(legend_position = legend_position) - - if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label) - if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label) - - # Adding text annotations if provided + # Apply labels, titles, and legends + plot <- plot + ggtitle(config$title) + theme_publication(legend_position = config$legend_position %||% "bottom") + + if (!is.null(config$x_label)) xlab(config$x_label) else NULL + + if (!is.null(config$y_label)) ylab(config$y_label) else NULL + + # Add annotations if available if (!is.null(config$annotations)) { - for (annotation in config$annotations) { - plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label) - } + plot <- plot + geom_text(aes(x = config$annotations$x, y = config$annotations$y, label = config$annotations$label)) } - + return(plot) }) - - # If grid_layout is provided, arrange plots in a grid and save in a single PDF - if (!is.null(grid_layout)) { - pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) - - # Loop through plots in chunks defined by ncol and nrow - for (start_idx in seq(1, length(plots), by = grid_layout$ncol * grid_layout$nrow)) { - end_idx <- min(start_idx + grid_layout$ncol * grid_layout$nrow - 1, length(plots)) - plot_subset <- plots[start_idx:end_idx] - - # Arrange plots in a grid - do.call(grid.arrange, c(plot_subset, ncol = grid_layout$ncol, nrow = grid_layout$nrow)) - } - - dev.off() - - plotly_plots <- lapply(plots, function(plot) { - suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))) - }) - combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow, margin = 0.05) - saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) - - } else { - # Save individual plots to PDF - pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) - lapply(plots, print) - dev.off() - - # Convert plots to plotly and save as HTML - plotly_plots <- lapply(plots, function(plot) { - suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))) - }) - combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05) - saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) - } + + # Save the plots + pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) + lapply(plots, print) + dev.off() + + plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))) + + # Handle grid layout + combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05) + saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) } generate_interaction_plot_configs <- function(df, variables) { - plot_configs <- list() + configs <- list() for (variable in variables) { # Define the y-limits based on the variable being plotted @@ -493,7 +439,7 @@ generate_interaction_plot_configs <- function(df, variables) { ) # Add scatter plot configuration for this variable - plot_configs[[length(plot_configs) + 1]] <- list( + configs[[length(configs) + 1]] <- list( df = df, x_var = "conc_num_factor", y_var = delta_var, @@ -511,7 +457,7 @@ generate_interaction_plot_configs <- function(df, variables) { ) # Add box plot configuration for this variable - plot_configs[[length(plot_configs) + 1]] <- list( + configs[[length(configs) + 1]] <- list( df = df, x_var = "conc_num_factor", y_var = variable, @@ -526,7 +472,7 @@ generate_interaction_plot_configs <- function(df, variables) { ) } - return(plot_configs) + return(configs) } # Adjust missing values and calculate ranks @@ -568,6 +514,8 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ])) ) ) + + return(configs) } # Non-annotated version (_notext) @@ -588,14 +536,14 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA } generate_correlation_plot_configs <- function(df, variables) { - plot_configs <- list() + configs <- list() for (variable in variables) { z_lm_var <- paste0("Z_lm_", variable) avg_zscore_var <- paste0("Avg_Zscore_", variable) lm_r_squared_col <- paste0("lm_R_squared_", variable) - plot_configs[[length(plot_configs) + 1]] <- list( + configs[[length(configs) + 1]] <- list( df = df, x_var = avg_zscore_var, y_var = z_lm_var, @@ -607,7 +555,7 @@ generate_correlation_plot_configs <- function(df, variables) { ) } - return(plot_configs) + return(configs) } main <- function() { @@ -647,21 +595,22 @@ main <- function() { df_na_stats <- ss$df_with_stats write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) - print("Summary stats:") - print(head(summary_stats), width = 200) + # print("Summary stats:") + # print(head(summary_stats), width = 200) # Remove rows with 0 values in L df_no_zeros <- df_na %>% filter(L > 0) # Additional filtering for non-finite values + # Filter and print non-finite rows, then filter and keep only finite rows df_na_filtered <- df_na %>% - filter(if_any(c(L), ~ !is.finite(.))) %>% { - if (nrow(.) > 0) { + non_finite_rows <- filter(., if_any(c(L), ~ !is.finite(.))) + if (nrow(non_finite_rows) > 0) { message("Removing non-finite rows:\n") - print(head(., n = 10)) + print(head(non_finite_rows, n = 10)) } - df_na %>% filter(if_all(c(L), is.finite)) + filter(., if_all(c(L), is.finite)) } # Filter data within and outside 2SD @@ -871,11 +820,11 @@ main <- function() { # Calculate interactions variables <- c("L", "K", "r", "AUC") message("Calculating interaction scores") - print("Reference strain:") - print(head(reference_strain)) + # print("Reference strain:") + # print(head(reference_strain)) reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables) - print("Deletion strains:") - print(head(deletion_strains)) + # print("Deletion strains:") + # print(head(deletion_strains)) deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables) zscores_calculations_reference <- reference_results$calculations @@ -952,7 +901,7 @@ main <- function() { generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K") ) generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", - plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2)) + plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) rank_lm_plot_config <- c( generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),