# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments library("ggplot2") library("plotly") library("htmlwidgets") library("extrafont") library("grid") library("ggthemes") args <- commandArgs(TRUE) exp1_name <- args[1] exp1_file <- args[2] exp2_name <- args[3] exp2_file <- args[4] output_dir <- args[5] pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp1_name, "-", exp2_name)) pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp1_name, "-", exp2_name)) # Pairwise L # outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/" # Theme elements for plots theme_Publication <- function(base_size = 14, base_family = "sans") { (theme_foundation(base_size = base_size, base_family = base_family) + theme( plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", legend.key.size = unit(0.2, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") { (theme_foundation(base_size = base_size, base_family = base_family) + theme( plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "right", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } scale_fill_Publication <- function(...) { discrete_scale("fill", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } scale_colour_Publication <- function(...) { discrete_scale("colour", "Publication", manual_pal( values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...) } X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE) X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE) X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2")) gg <- ggplot(data = X, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Ontology_Avg_X1, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect( aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA ) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html")) htmlwidgets::saveWidget(pgg, fname) # ID aggravators and alleviators, regardless of whether they meet 2SD threshold X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2), ] X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2), ] X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2), ] X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2), ] Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2), ] Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2), ] X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2), ] X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2), ] X$Overlap_Avg <- NA try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers") try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors") try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")) gg <- ggplot(data = X, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Overlap_Avg, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect( aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA ) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html")) htmlwidgets::saveWidget(pgg, fname) x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ] #3 gg <- ggplot(data = x_rem2_gene, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Overlap_Avg, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html")) htmlwidgets::saveWidget(pgg, fname) #4 X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ] gg <- ggplot(data = X_overlap_nothresold, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Overlap_Avg, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html")) htmlwidgets::saveWidget(pgg, fname) # Only output GTA terms where average score is still above 2 after subtracting the SD # Z1 will ID aggravators, Z2 alleviators Z1 <- X Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1 Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2 Z2 <- X Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1 Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2 X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ] X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ] X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ] X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ] Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ] Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ] X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ] X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ] X$Overlap <- NA try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers") try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors") try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <- paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")) #5 X_abovethreshold <- X[!(is.na(X$Overlap)), ] gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html")) htmlwidgets::saveWidget(pgg, fname) #6 gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3, size = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")), width = 20, height = 20 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html")) htmlwidgets::saveWidget(pgg, fname) X_abovethreshold$X1_Rank <- NA X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1, ties.method = "random") X_abovethreshold$X2_Rank <- NA X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2, ties.method = "random") #7 gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3, size = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")), width = 15, height = 15 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html")) htmlwidgets::saveWidget(pgg, fname) #8 gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_L_Avg_X1, y = Z_lm_L_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_L_SD_X1, SD_2 = Z_lm_L_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3, size = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")), width = 15, height = 15 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html")) htmlwidgets::saveWidget(pgg, fname) write.csv( x = X, file.path(pairDirL, paste0("All_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")), row.names = FALSE ) write.csv( x = X_abovethreshold, file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")), row.names = FALSE ) # Begin Pairwsise K # Theme elements for plots theme_Publication <- function(base_size = 14, base_family = "sans") { (theme_foundation(base_size = base_size, base_family = base_family) + theme( plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", legend.key.size = unit(0.2, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } scale_fill_Publication <- function(...) { library("scales") discrete_scale( "fill", "Publication", manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ... ) } scale_colour_Publication <- function(...) { discrete_scale( "colour", "Publication", manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ... ) } theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") { (theme_foundation(base_size = base_size, base_family = base_family) + theme( plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.text = element_text(), axis.line = element_line(colour = "black"), axis.ticks = element_line(), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "right", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic"), plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) ) } scale_fill_Publication <- function(...) { discrete_scale( "fill", "Publication", manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ... ) } scale_colour_Publication <- function(...) { discrete_scale( "colour", "Publication", manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ... ) } X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE) X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE) #1 X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2")) gg <- ggplot(data = X, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Ontology_Avg_X1, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html")) htmlwidgets::saveWidget(pgg, fname) #2 # ID aggravators and alleviators, regardless of whether they meet 2SD threshold X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2), ] X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2), ] X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2), ] X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2), ] Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2), ] Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2), ] X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2), ] X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2), ] X$Overlap_Avg <- NA try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors") try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers") try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <- paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_")) plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html")) gg <- ggplot(data = X, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Overlap_Avg, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #2 fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html")) htmlwidgets::saveWidget(pgg, fname) #3 x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ] plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html")) gg <- ggplot(data = x_rem2_gene, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Overlap_Avg, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html")) htmlwidgets::saveWidget(pgg, fname) #4 X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ] gg <- ggplot(data = X_overlap_nothresold, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Overlap_Avg, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html")) htmlwidgets::saveWidget(pgg, fname) #5 # Only output GTA terms where average score is still above 2 after subtracting the SD # Z1 will ID aggravators, Z2 alleviators Z1 <- X Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1 Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2 Z2 <- X Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1 Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2 X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ] X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ] X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ] X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ] Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ] Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ] X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ] X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ] X$Overlap <- NA try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors") try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers") try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <- paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")) try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_")) X_abovethreshold <- X[!(is.na(X$Overlap)), ] gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")), width = 12, height = 8 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html")) htmlwidgets::saveWidget(pgg, fname) #6 gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3, size = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")), width = 20, height = 20 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html")) htmlwidgets::saveWidget(pgg, fname) #7 X_abovethreshold$X1_Rank <- NA X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1, ties.method = "random") X_abovethreshold$X2_Rank <- NA X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2, ties.method = "random") gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3, size = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")), width = 15, height = 15 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html")) htmlwidgets::saveWidget(pgg, fname) #8 gg <- ggplot(data = X_abovethreshold, aes( x = Z_lm_K_Avg_X1, y = Z_lm_K_Avg_X2, color = Overlap, Term = Term_Avg, Genes = Genes_Avg_X1, NumGenes = NumGenes_Avg_X1, AllPossibleGenes = AllPossibleGenes_Avg_X1, SD_1 = Z_lm_K_SD_X1, SD_2 = Z_lm_K_SD_X2 )) + xlab(paste("GO Term Avg lm Z for", exp1_name)) + geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) + geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) + geom_point(shape = 3, size = 3) + ylab(paste("GO Term Avg lm Z for", exp2_name)) + ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) + theme_Publication_legend_right() pdf( file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")), width = 15, height = 15 ) gg dev.off() pgg <- ggplotly(gg) #pgg fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html")) htmlwidgets::saveWidget(pgg, fname) write.csv( x = X, file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")), row.names = FALSE ) write.csv( x = X_abovethreshold, file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")), row.names = FALSE )