210 lines
8.8 KiB
R
210 lines
8.8 KiB
R
suppressMessages({
|
|
library("ggplot2")
|
|
library("plotly")
|
|
library("htmlwidgets")
|
|
library("extrafont")
|
|
library("grid")
|
|
library("ggthemes")
|
|
})
|
|
|
|
# Constants for configuration
|
|
PLOT_WIDTH <- 12
|
|
PLOT_HEIGHT <- 8
|
|
BASE_SIZE <- 14
|
|
|
|
options(warn = 2, max.print = 100)
|
|
|
|
# Parse arguments
|
|
parse_arguments <- function() {
|
|
if (interactive()) {
|
|
args <- c(
|
|
"Exp1",
|
|
"/path/to/exp1_file.csv",
|
|
"Exp2",
|
|
"/path/to/exp2_file.csv",
|
|
"/path/to/output_dir"
|
|
)
|
|
} else {
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
}
|
|
list(
|
|
exp1_name = args[1],
|
|
exp1_file = normalizePath(args[2], mustWork = TRUE),
|
|
exp2_name = args[3],
|
|
exp2_file = normalizePath(args[4], mustWork = TRUE),
|
|
output_dir = normalizePath(args[5], mustWork = FALSE)
|
|
)
|
|
}
|
|
|
|
args <- parse_arguments()
|
|
|
|
# Create output directories if they don't exist
|
|
pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
|
|
pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
|
|
dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
|
|
dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
|
|
|
|
# Define themes and scales
|
|
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
|
|
theme_foundation(base_size = base_size, base_family = base_family) +
|
|
theme(
|
|
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
|
|
panel.background = element_rect(colour = NA),
|
|
plot.background = element_rect(colour = NA),
|
|
panel.border = element_rect(colour = NA),
|
|
axis.title = element_text(face = "bold", size = rel(1)),
|
|
axis.line = element_line(colour = "black"),
|
|
panel.grid.major = element_line(colour = "#f0f0f0"),
|
|
panel.grid.minor = element_blank(),
|
|
legend.position = "bottom",
|
|
legend.direction = "horizontal",
|
|
legend.key.size = unit(0.2, "cm"),
|
|
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
|
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
|
strip.text = element_text(face = "bold")
|
|
)
|
|
}
|
|
|
|
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
|
|
theme_publication(base_size, base_family) +
|
|
theme(
|
|
legend.position = "right",
|
|
legend.direction = "vertical",
|
|
legend.key.size = unit(0.5, "cm")
|
|
)
|
|
}
|
|
|
|
scale_fill_publication <- function(...) {
|
|
discrete_scale("fill", "Publication", manual_pal(values = c(
|
|
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
|
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
|
)), ...)
|
|
}
|
|
|
|
scale_colour_publication <- function(...) {
|
|
discrete_scale("colour", "Publication", manual_pal(values = c(
|
|
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
|
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
|
)), ...)
|
|
}
|
|
|
|
# Read and merge data
|
|
load_and_merge_data <- function(file1, file2) {
|
|
df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
|
|
df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
|
|
merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
|
|
}
|
|
|
|
# Generate a plot and save to PDF and HTML
|
|
generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
|
|
gg <- ggplot(data = data, aes_string(
|
|
x = x_var,
|
|
y = y_var,
|
|
color = color_var
|
|
)) +
|
|
xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
|
|
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
|
geom_point(shape = 3) +
|
|
ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
|
|
ggtitle(title) +
|
|
theme_function()
|
|
|
|
pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
|
|
print(gg)
|
|
dev.off()
|
|
|
|
pgg <- ggplotly(gg)
|
|
htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
|
|
}
|
|
|
|
# Identify and annotate specific interactions
|
|
annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
|
|
df$Overlap_Avg <- NA
|
|
interactions <- list(
|
|
"df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
|
|
"df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
|
|
"df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
|
|
"df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
|
|
"Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
|
|
"Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
|
|
"df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
|
|
"df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
|
|
)
|
|
annotation <- list(
|
|
df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
|
|
df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
|
|
df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
|
|
df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
|
|
Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
|
|
Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
|
|
df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
|
|
df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
|
|
)
|
|
for (key in names(interactions)) {
|
|
try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
|
|
}
|
|
df
|
|
}
|
|
|
|
# Rank and filter data
|
|
rank_and_filter_data <- function(df, suffix) {
|
|
z1 <- df
|
|
z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
|
|
z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
|
|
|
|
z2 <- df
|
|
z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
|
|
z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
|
|
|
|
df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
|
|
df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
|
|
df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
|
|
|
|
list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
|
|
}
|
|
|
|
# Main execution function for a pairwise comparison
|
|
run_pairwise_comparison <- function(suffix, dir) {
|
|
df <- load_and_merge_data(args$exp1_file, args$exp2_file)
|
|
|
|
# Generate initial ontology-based plot
|
|
generate_plot(df,
|
|
paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
|
|
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
|
|
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
|
|
theme_publication_legend_right)
|
|
|
|
# Annotate interactions and generate overlap-based plot
|
|
df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
|
|
ranks <- rank_and_filter_data(df, suffix)
|
|
|
|
generate_plot(df,
|
|
paste0("Z_lm_", suffix, "_Avg_df1"),
|
|
paste0("Z_lm_", suffix, "_Avg_df2"),
|
|
"Overlap_Avg",
|
|
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
|
|
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
|
|
theme_publication_legend_right)
|
|
|
|
generate_plot(ranks$df_above_threshold,
|
|
paste0("Z_lm_", suffix, "_Avg_df1"),
|
|
paste0("Z_lm_", suffix, "_Avg_df2"),
|
|
"Overlap_Avg",
|
|
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
|
|
file.path(dir,
|
|
paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
|
|
theme_publication_legend_right)
|
|
|
|
# Save CSV files
|
|
write.csv(df, file.path(dir,
|
|
paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
|
|
row.names = FALSE)
|
|
write.csv(ranks$df_above_threshold,
|
|
file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
|
|
row.names = FALSE)
|
|
}
|
|
|
|
# Execute Pairwise L and Pairwise K comparisons
|
|
run_pairwise_comparison("L", pairDirL)
|
|
run_pairwise_comparison("K", pairDirK)
|