123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209 |
- suppressMessages({
- library("ggplot2")
- library("plotly")
- library("htmlwidgets")
- library("extrafont")
- library("grid")
- library("ggthemes")
- })
- # Constants for configuration
- PLOT_WIDTH <- 12
- PLOT_HEIGHT <- 8
- BASE_SIZE <- 14
- options(warn = 2, max.print = 100)
- # Parse arguments
- parse_arguments <- function() {
- if (interactive()) {
- args <- c(
- "Exp1",
- "/path/to/exp1_file.csv",
- "Exp2",
- "/path/to/exp2_file.csv",
- "/path/to/output_dir"
- )
- } else {
- args <- commandArgs(trailingOnly = TRUE)
- }
- list(
- exp1_name = args[1],
- exp1_file = normalizePath(args[2], mustWork = TRUE),
- exp2_name = args[3],
- exp2_file = normalizePath(args[4], mustWork = TRUE),
- output_dir = normalizePath(args[5], mustWork = FALSE)
- )
- }
- args <- parse_arguments()
- # Create output directories if they don't exist
- pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
- pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
- dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
- dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
- # Define themes and scales
- theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
- theme_foundation(base_size = base_size, base_family = base_family) +
- theme(
- plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
- panel.background = element_rect(colour = NA),
- plot.background = element_rect(colour = NA),
- panel.border = element_rect(colour = NA),
- axis.title = element_text(face = "bold", size = rel(1)),
- axis.line = element_line(colour = "black"),
- panel.grid.major = element_line(colour = "#f0f0f0"),
- panel.grid.minor = element_blank(),
- legend.position = "bottom",
- legend.direction = "horizontal",
- legend.key.size = unit(0.2, "cm"),
- plot.margin = unit(c(10, 5, 5, 5), "mm"),
- strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
- strip.text = element_text(face = "bold")
- )
- }
- theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
- theme_publication(base_size, base_family) +
- theme(
- legend.position = "right",
- legend.direction = "vertical",
- legend.key.size = unit(0.5, "cm")
- )
- }
- scale_fill_publication <- function(...) {
- discrete_scale("fill", "Publication", manual_pal(values = c(
- "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
- "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
- )), ...)
- }
- scale_colour_publication <- function(...) {
- discrete_scale("colour", "Publication", manual_pal(values = c(
- "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
- "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
- )), ...)
- }
- # Read and merge data
- load_and_merge_data <- function(file1, file2) {
- df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
- df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
- merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
- }
- # Generate a plot and save to PDF and HTML
- generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
- gg <- ggplot(data = data, aes_string(
- x = x_var,
- y = y_var,
- color = color_var
- )) +
- xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
- geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
- geom_point(shape = 3) +
- ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
- ggtitle(title) +
- theme_function()
- pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
- print(gg)
- dev.off()
- pgg <- ggplotly(gg)
- htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
- }
- # Identify and annotate specific interactions
- annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
- df$Overlap_Avg <- NA
- interactions <- list(
- "df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
- "df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
- "df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
- "df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
- "Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
- "Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
- "df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
- "df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
- )
- annotation <- list(
- df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
- df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
- df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
- df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
- Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
- Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
- df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
- df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
- )
- for (key in names(interactions)) {
- try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
- }
- df
- }
- # Rank and filter data
- rank_and_filter_data <- function(df, suffix) {
- z1 <- df
- z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
- z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
-
- z2 <- df
- z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
- z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
-
- df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
- df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
- df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
-
- list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
- }
- # Main execution function for a pairwise comparison
- run_pairwise_comparison <- function(suffix, dir) {
- df <- load_and_merge_data(args$exp1_file, args$exp2_file)
-
- # Generate initial ontology-based plot
- generate_plot(df,
- paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
- paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
- file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
- theme_publication_legend_right)
- # Annotate interactions and generate overlap-based plot
- df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
- ranks <- rank_and_filter_data(df, suffix)
-
- generate_plot(df,
- paste0("Z_lm_", suffix, "_Avg_df1"),
- paste0("Z_lm_", suffix, "_Avg_df2"),
- "Overlap_Avg",
- paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
- file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
- theme_publication_legend_right)
- generate_plot(ranks$df_above_threshold,
- paste0("Z_lm_", suffix, "_Avg_df1"),
- paste0("Z_lm_", suffix, "_Avg_df2"),
- "Overlap_Avg",
- paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
- file.path(dir,
- paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
- theme_publication_legend_right)
-
- # Save CSV files
- write.csv(df, file.path(dir,
- paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
- row.names = FALSE)
- write.csv(ranks$df_above_threshold,
- file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
- row.names = FALSE)
- }
- # Execute Pairwise L and Pairwise K comparisons
- run_pairwise_comparison("L", pairDirL)
- run_pairwise_comparison("K", pairDirK)
|