Squashed initial commit

This commit is contained in:
2024-09-10 13:47:29 -04:00
commit 8ebb6ad265
6221 changed files with 2512206 additions and 0 deletions

View File

@@ -0,0 +1,895 @@
# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
library("ggplot2")
library("plotly")
library("htmlwidgets")
library("extrafont")
library("grid")
library("ggthemes")
args <- commandArgs(TRUE)
exp1_name <- args[1]
exp1_file <- args[2]
exp2_name <- args[3]
exp2_file <- args[4]
output_dir <- args[5]
pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp1_name, "-", exp2_name))
pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp1_name, "-", exp2_name))
# Pairwise L
# outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
# Theme elements for plots
theme_Publication <- function(base_size = 14, base_family = "sans") {
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size = unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
scale_fill_Publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
scale_colour_Publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
gg <- ggplot(data = X, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Ontology_Avg_X1,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
color = "grey20",
size = 0.25,
alpha = 0.1,
inherit.aes = FALSE,
fill = NA
) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
htmlwidgets::saveWidget(pgg, fname)
# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2), ]
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2), ]
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2), ]
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2), ]
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2), ]
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2), ]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2), ]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2), ]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
gg <- ggplot(data = X, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Overlap_Avg,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
color = "grey20",
size = 0.25,
alpha = 0.1,
inherit.aes = FALSE,
fill = NA
) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
#3
gg <- ggplot(data = x_rem2_gene, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Overlap_Avg,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
htmlwidgets::saveWidget(pgg, fname)
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ]
gg <- ggplot(data = X_overlap_nothresold, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Overlap_Avg,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
# Only output GTA terms where average score is still above 2 after subtracting the SD
# Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
#5
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
htmlwidgets::saveWidget(pgg, fname)
#6
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
width = 20,
height = 20
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
htmlwidgets::saveWidget(pgg, fname)
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1, ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2, ties.method = "random")
#7
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
width = 15,
height = 15
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
htmlwidgets::saveWidget(pgg, fname)
#8
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_L_Avg_X1,
y = Z_lm_L_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
width = 15,
height = 15
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
htmlwidgets::saveWidget(pgg, fname)
write.csv(
x = X,
file.path(pairDirL, paste0("All_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)
write.csv(
x = X_abovethreshold,
file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)
# Begin Pairwsise K
# Theme elements for plots
theme_Publication <- function(base_size = 14, base_family = "sans") {
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size = unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
scale_fill_Publication <- function(...) {
library("scales")
discrete_scale(
"fill",
"Publication",
manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
...
)
}
scale_colour_Publication <- function(...) {
discrete_scale(
"colour",
"Publication",
manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
...
)
}
theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
)
}
scale_fill_Publication <- function(...) {
discrete_scale(
"fill",
"Publication",
manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
...
)
}
scale_colour_Publication <- function(...) {
discrete_scale(
"colour",
"Publication",
manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
...
)
}
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
#1
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
gg <- ggplot(data = X, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Ontology_Avg_X1,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
htmlwidgets::saveWidget(pgg, fname)
#2
# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2), ]
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2), ]
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2), ]
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2), ]
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2), ]
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2), ]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2), ]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2), ]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <-
"Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <-
"Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
gg <- ggplot(data = X, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Overlap_Avg,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#2
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
#3
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
gg <- ggplot(data = x_rem2_gene, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Overlap_Avg,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
htmlwidgets::saveWidget(pgg, fname)
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ]
gg <- ggplot(data = X_overlap_nothresold, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Overlap_Avg,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
#5
# Only output GTA terms where average score is still above 2 after subtracting the SD
# Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <-
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <-
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <-
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <-
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <-
"Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <-
"Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
width = 12,
height = 8
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
htmlwidgets::saveWidget(pgg, fname)
#6
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
width = 20,
height = 20
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
htmlwidgets::saveWidget(pgg, fname)
#7
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1, ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2, ties.method = "random")
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
width = 15,
height = 15
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
htmlwidgets::saveWidget(pgg, fname)
#8
gg <- ggplot(data = X_abovethreshold, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
color = Overlap,
Term = Term_Avg,
Genes = Genes_Avg_X1,
NumGenes = NumGenes_Avg_X1,
AllPossibleGenes = AllPossibleGenes_Avg_X1,
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
width = 15,
height = 15
)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
htmlwidgets::saveWidget(pgg, fname)
write.csv(
x = X,
file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)
write.csv(
x = X_abovethreshold,
file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)