suppressMessages({ library("ggplot2") library("plotly") library("htmlwidgets") library("extrafont") library("grid") library("ggthemes") }) # Constants for configuration PLOT_WIDTH <- 12 PLOT_HEIGHT <- 8 BASE_SIZE <- 14 options(warn = 2, max.print = 100) # Parse arguments parse_arguments <- function() { if (interactive()) { args <- c( "Exp1", "/path/to/exp1_file.csv", "Exp2", "/path/to/exp2_file.csv", "/path/to/output_dir" ) } else { args <- commandArgs(trailingOnly = TRUE) } list( exp1_name = args[1], exp1_file = normalizePath(args[2], mustWork = TRUE), exp2_name = args[3], exp2_file = normalizePath(args[4], mustWork = TRUE), output_dir = normalizePath(args[5], mustWork = FALSE) ) } args <- parse_arguments() # Create output directories if they don't exist pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name)) pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name)) dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE) dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE) # Define themes and scales theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") { theme_foundation(base_size = base_size, base_family = base_family) + theme( plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.line = element_line(colour = "black"), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.position = "bottom", legend.direction = "horizontal", legend.key.size = unit(0.2, "cm"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) } theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") { theme_publication(base_size, base_family) + theme( legend.position = "right", legend.direction = "vertical", legend.key.size = unit(0.5, "cm") ) } scale_fill_publication <- function(...) { discrete_scale("fill", "Publication", manual_pal(values = c( "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" )), ...) } scale_colour_publication <- function(...) { discrete_scale("colour", "Publication", manual_pal(values = c( "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" )), ...) } # Read and merge data load_and_merge_data <- function(file1, file2) { df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE) df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE) merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2")) } # Generate a plot and save to PDF and HTML generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) { gg <- ggplot(data = data, aes_string( x = x_var, y = y_var, color = color_var )) + xlab(paste("GO Term Avg lm Z for", args$exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", args$exp2_name)) + ggtitle(title) + theme_function() pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT) print(gg) dev.off() pgg <- ggplotly(gg) htmlwidgets::saveWidget(pgg, file.path(file_path, ".html")) } # Identify and annotate specific interactions annotate_interactions <- function(df, exp1_name, exp2_name, suffix) { df$Overlap_Avg <- NA interactions <- list( "df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2), "df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2), "df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2), "df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2), "Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2), "Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2), "df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2), "df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2) ) annotation <- list( df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"), df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"), df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"), df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"), Overlap_Aggravators = "Overlapping_Deletion_Enhancers", Overlap_Alleviators = "Overlapping_Deletion_Suppressors", df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"), df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_") ) for (key in names(interactions)) { try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]]) } df } # Rank and filter data rank_and_filter_data <- function(df, suffix) { z1 <- df z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]] z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]] z2 <- df z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]] z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]] df_above_threshold <- df[!is.na(df$Overlap_Avg), ] df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random") df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random") list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold) } # Main execution function for a pairwise comparison run_pairwise_comparison <- function(suffix, dir) { df <- load_and_merge_data(args$exp1_file, args$exp2_file) # Generate initial ontology-based plot generate_plot(df, paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1", paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name), file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")), theme_publication_legend_right) # Annotate interactions and generate overlap-based plot df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix) ranks <- rank_and_filter_data(df, suffix) generate_plot(df, paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Overlap_Avg", paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name), file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")), theme_publication_legend_right) generate_plot(ranks$df_above_threshold, paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Overlap_Avg", paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"), file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")), theme_publication_legend_right) # Save CSV files write.csv(df, file.path(dir, paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")), row.names = FALSE) write.csv(ranks$df_above_threshold, file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")), row.names = FALSE) } # Execute Pairwise L and Pairwise K comparisons run_pairwise_comparison("L", pairDirL) run_pairwise_comparison("K", pairDirK)