diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 8a8e2959..d2f3bce6 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -119,8 +119,7 @@ load_and_process_data <- function(easy_results_file, sd = 3) { # Rename columns rename(L = l, num = Num., AUC = AUC96, scan = Scan, last_bg = LstBackgrd, first_bg = X1stBackgrd) %>% mutate( - across(c(Col, Row, num), as.numeric), - across(c(L, K, r, scan, AUC, last_bg, first_bg), as.numeric), + across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric), delta_bg = last_bg - first_bg, delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)), NG = if_else(L == 0 & !is.na(L), 1, 0), @@ -300,99 +299,101 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c } -generate_and_save_plots <- function(output_dir, file_name, plot_configs) { - +# Main function +generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { + `%||%` <- function(a, b) if (!is.null(a)) a else b - + # Generalized plot generation plots <- lapply(plot_configs, function(config) { - # Use the dataframe from the plot configuration df <- config$df - - # Initialize the basic ggplot plot <- ggplot(df, aes(x = if (config$plot_type == "box") as.factor(!!sym(config$x_var)) else !!sym(config$x_var), color = as.factor(!!sym(config$color_var)))) - - # Handle specific y_var cases, like "delta_bg" + if (!is.null(config$y_var)) { plot <- plot + aes(y = !!sym(config$y_var)) - + y_mean_col <- paste0("mean_", config$y_var) y_sd_col <- paste0("sd_", config$y_var) - - # Special case for "delta_bg" to add jitter + if (config$y_var == "delta_bg" && config$plot_type == "scatter") { plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") + - geom_errorbar(aes( - ymin = !!sym(y_mean_col) - !!sym(y_sd_col), - ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), - width = 0.1) + + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), + ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) } else if (config$error_bar %||% FALSE) { - # Directly use geom_point and geom_errorbar with pre-calculated values plot <- plot + geom_point(shape = 3, size = 0.2) + - geom_errorbar(aes( - ymin = !!sym(y_mean_col) - !!sym(y_sd_col), - ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), - width = 0.1) + + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), + ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) + geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) } } - - # Switch plot type (scatter, box, density, bar) + plot <- switch(config$plot_type, - "box" = plot + geom_boxplot(), - "density" = plot + geom_density(), - "bar" = plot + geom_bar(stat = "identity"), - plot + geom_point() + geom_smooth(method = "lm", se = FALSE) # Default is scatter - ) - - # Apply y-axis limits if provided + "box" = plot + geom_boxplot(), + "density" = plot + geom_density(), + "bar" = plot + geom_bar(stat = "identity"), + plot + geom_point() + geom_smooth(method = "lm", se = FALSE)) + if (!is.null(config$ylim_vals)) { plot <- plot + coord_cartesian(ylim = config$ylim_vals) } - - # Add title and publication theme with custom legend position - legend_position <- config$legend_position %||% "bottom" # Default to bottom if not specified + + legend_position <- config$legend_position %||% "bottom" plot <- plot + ggtitle(config$title) + theme_publication(legend_position = legend_position) - - # Add x and y axis labels if provided + if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label) if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label) - - # Add custom annotations if provided + if (!is.null(config$annotations)) { for (annotation in config$annotations) { - plot <- plot + annotate("text", - x = annotation$x, - y = annotation$y, - label = annotation$label) + plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label) } } - + return(plot) }) - - # Save plots to PDF - pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) - lapply(plots, print) - dev.off() - - # Convert plots to plotly and combine them using subplot - plotly_plots <- lapply(plots, function(plot) { - suppressWarnings(ggplotly(plot) %>% - layout(legend = list(orientation = "h"))) - }) - # Combine all plotly plots into a single interactive layout using subplot - combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05) - - # Save the combined HTML file - saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) + # If grid_layout is provided, arrange plots in a grid and save in a single PDF + if (!is.null(grid_layout)) { + pdf(file.path(output_dir, paste0(file_name, "_grid.pdf")), width = 14, height = 9) + + # Loop through plots in chunks defined by ncol and nrow + for (start_idx in seq(1, length(plots), by = grid_layout$ncol * grid_layout$nrow)) { + end_idx <- min(start_idx + grid_layout$ncol * grid_layout$nrow - 1, length(plots)) + plot_subset <- plots[start_idx:end_idx] + + # Arrange plots in a grid + do.call(grid.arrange, c(plot_subset, ncol = grid_layout$ncol, nrow = grid_layout$nrow)) + } + + dev.off() + + # Save as HTML (optional) + plotly_plots <- lapply(plots, function(plot) { + suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))) + }) + combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow, margin = 0.05) + saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, "_grid.html")), selfcontained = TRUE) + + } else { + # Save individual plots to PDF + pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) + lapply(plots, print) + dev.off() + + # Convert plots to plotly and save as HTML + plotly_plots <- lapply(plots, function(plot) { + suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))) + }) + combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05) + saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE) + } } + interaction_plot_configs <- function(df, variables) { plot_configs <- list() @@ -428,18 +429,18 @@ interaction_plot_configs <- function(df, variables) { # Append a new plot configuration for each variable plot_configs[[length(plot_configs) + 1]] <- list( df = df, - x_var = "Conc_Num_Factor", + x_var = "conc_num_factor", y_var = delta_var, plot_type = "scatter", - title = paste(df$OrfRep[1], df$Gene[1], sep = " "), + title = sprintf("%s %s", df$OrfRep[1], df$Gene[1]), ylim_vals = ylim_vals, annotations = annotations, error_bar = list( ymin = 0 - (2 * df[[wt_sd_col]][1]), ymax = 0 + (2 * df[[wt_sd_col]][1]) ), - x_breaks = unique(df$Conc_Num_Factor), - x_labels = unique(as.character(df$Conc_Num)), + x_breaks = unique(df$conc_num_factor), + x_labels = unique(as.character(df$conc_num)), x_label = unique(df$Drug[1]) ) } @@ -447,9 +448,6 @@ interaction_plot_configs <- function(df, variables) { return(plot_configs) } - - - correlation_plot_configs <- function(df, lm_list, lm_summaries) { lapply(seq_along(lm_list), function(i) { r_squared <- round(lm_summaries[[i]]$r.squared, 3) @@ -555,6 +553,9 @@ main <- function() { # Plot configurations # Each list corresponds to a group of plots in the same file + + # Print quality control graphs before removing data due to contamination and + # adjusting missing data to max theoretical values l_vs_k_plots <- list( list(df = df, x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", @@ -672,74 +673,6 @@ main <- function() { generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) - - # Print quality control graphs before removing data due to contamination and - # adjusting missing data to max theoretical values - - # list(x_var = "delta_bg", y_var = NULL, plot_type = "density", - # title = "Density plot for Delta Background", - # ylim_vals = NULL), - # list(x_var = "as.factor(delta_bg)", y_var = NULL, plot_type = "bar", - # title = "Bar plot for Delta Background", - # ylim_vals = NULL) - - above_tolerance_plot_configs <- list( - list(x_var = "L", y_var = "K", plot_type = "scatter", - title = "Raw L vs K for strains above delta background threshold", ylim_vals = NULL) - ) - - after_qc_plot_configs <- list( - list(x_var = "L", y_var = "K", plot_type = "scatter", - title = "L vs K After QC Filtering", ylim_vals = NULL) - ) - - no_zeros_plot_configs <- list( - list(x_var = "scan", y_var = "L", plot_type = "scatter", - title = "Plate analysis by Drug Conc for L after quality control", - file_name = "Plate_Analysis_L_afterQC_Z", error_bar = TRUE, color_var = "conc_num"), - list(x_var = "scan", y_var = "K", plot_type = "scatter", - title = "Plate analysis by Drug Conc for K after quality control", - file_name = "Plate_Analysis_K_afterQC_Z", error_bar = TRUE, color_var = "conc_num"), - list(x_var = "scan", y_var = "r", plot_type = "scatter", - title = "Plate analysis by Drug Conc for r after quality control", - file_name = "Plate_Analysis_r_afterQC_Z", error_bar = TRUE, color_var = "conc_num"), - list(x_var = "scan", y_var = "AUC", plot_type = "scatter", - title = "Plate analysis by Drug Conc for AUC after quality control", - file_name = "Plate_Analysis_AUC_afterQC_Z", error_bar = TRUE, color_var = "conc_num"), - list(x_var = "scan", y_var = "delta_bg", plot_type = "scatter", - title = "Plate analysis by Drug Conc for Delta_Backgrd after quality control", - file_name = "Plate_Analysis_Delta_Backgrd_afterQC_Z", error_bar = TRUE, color_var = "conc_num"), - - # Box plots - list(x_var = "as.factor(scan)", y_var = "L", plot_type = "box", - title = "Plate analysis by Drug Conc for L after quality control", - file_name = "Plate_Analysis_L_Box_afterQC_Z"), - list(x_var = "as.factor(scan)", y_var = "K", plot_type = "box", - title = "Plate analysis by Drug Conc for K after quality control", - file_name = "Plate_Analysis_K_Box_afterQC_Z"), - list(x_var = "as.factor(scan)", y_var = "r", plot_type = "box", - title = "Plate analysis by Drug Conc for r after quality control", - file_name = "Plate_Analysis_r_Box_afterQC_Z"), - list(x_var = "as.factor(scan)", y_var = "AUC", plot_type = "box", - title = "Plate analysis by Drug Conc for AUC after quality control", - file_name = "Plate_Analysis_AUC_Box_afterQC_Z"), - list(x_var = "as.factor(scan)", y_var = "delta_bg", plot_type = "box", - title = "Plate analysis by Drug Conc for Delta_Backgrd after quality control", - file_name = "Plate_Analysis_Delta_Backgrd_Box_afterQC_Z") - ) - - - - - - - - # generate_and_save_plots(df, out_dir_qc, "raw_L_vs_K_before_QC.pdf", delta_bg_plots) - # generate_and_save_plots(df, out_dir_qc, "plate_analysis", before_qc_plot_configs) - # generate_and_save_plots(df_above_tolerance, out_dir_qc, above_tolerance_plot_configs) - # generate_and_save_plots(df_na_filtered, out_dir_qc, after_qc_plot_configs) - # generate_and_save_plots(df_no_zeros, out_dir_qc, "plate_analysis_no_zeros", no_zeros_plot_configs) - # Clean up rm(df, df_above_tolerance, df_no_zeros) @@ -773,9 +706,6 @@ main <- function() { write.csv(summary_stats_bg, file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), row.names = FALSE) - - #print("Background summary stats:") - #print(head(summary_stats_bg)) # Filter reference and deletion strains # Formerly X2_RF (reference strains) @@ -831,10 +761,10 @@ main <- function() { write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) # Create interaction plots - deletion_plot_configs <- interaction_plot_configs(df_reference, variables) - generate_and_save_plots(zscores_calculations, out_dir, deletion_plot_configs) + reference_plot_configs <- interaction_plot_configs(df_reference, variables) deletion_plot_configs <- interaction_plot_configs(df_deletion, variables) - generate_and_save_plots(zscores_calculations, out_dir, deletion_plot_configs) + generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) + generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) # Define conditions for enhancers and suppressors # TODO Add to study config file?