Rollup before parallelization

This commit is contained in:
2024-08-14 23:20:29 -04:00
parent 1ba1f14537
commit 6992d5eec0
8 changed files with 2517 additions and 2434 deletions

View File

@@ -8,17 +8,16 @@
# @arg $2 string gene_ontology_edit.obo file
# @arg $3 string go_terms.tab file
# @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
# @arg $5 string ZScores_interaction.csv
# @arg $6 string base directory
# @arg $7 string output directory
# @arg $5 string base directory
# @arg $6 string output directory
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
# library("plotly")
# library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
@@ -31,10 +30,9 @@ study_info_file <- args[1]
ontology_file <- args[2]
sgd_terms_tfile <- args[3]
all_sgd_terms_csv <- args[4]
zscores_file <- args[5]
base_dir <- args[6]
output_dir <- args[7]
study_nums <- args[8:length(args)]
base_dir <- args[5]
output_dir <- args[6]
study_nums <- args[7:length(args)]
# Import standard tables used in Sean's code That should be copied to each ExpStudy
labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
@@ -52,7 +50,7 @@ XX3[, 2] <- gsub(pattern = "/", replacement = "_", x = XX3[, 2])
# Load input files
for (study_num in study_nums) {
input_file <- file.path(base_dir, paste("Exp", study_num), zscores_file)
input_file <- file.path(base_dir, paste("Exp", study_num), zscores, "zscores_interaction.csv")
if (file.exists(input_file)) {
assign(paste(X, study_num), read.csv(file = input_file, stringsAsFactors = FALSE, header = TRUE))
assign(paste(Name, study_num), labels[study_num, 2])
@@ -206,10 +204,10 @@ if (length(study_nums) > 1) {
try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
X_heatmap <-
X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
X_heatmap <- X_heatmap[, c(10, 1, 4, 5, 8, 9, 2, 3, 6, 7)]
colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
@@ -226,12 +224,12 @@ if (length(study_nums) > 2) {
X_heatmap <-
X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"]
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"]
# Reorder columns
X_heatmap <- X_heatmap[, c(14, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11)]
@@ -252,14 +250,14 @@ if (length(study_nums) > 3) {
X_heatmap <-
X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"]
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"]
# Reorder columns
X_heatmap <- X_heatmap[, c(18, 1, 4, 5, 8, 9, 12, 13, 16, 17, 2, 3, 6, 7, 10, 11, 14, 15)]
@@ -283,16 +281,16 @@ if (length(study_nums) > 4) {
X_heatmap <-
X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
# Reorder columns
X_heatmap <- X_heatmap[, c(22, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19)]
@@ -441,7 +439,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size > 2000) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 45, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 45,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -461,7 +466,7 @@ for (s in 1:dim(XX3)[1]) {
keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = GO_Term_Name,
#ColSideColors = ev_repeat,
# ColSideColors = ev_repeat,
labRow = as.character(Genes_Annotated_to_Term$Gene)
))
}
@@ -470,7 +475,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size >= 1000 && Parent_Size <= 2000) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 35, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 35,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -490,7 +502,7 @@ for (s in 1:dim(XX3)[1]) {
keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = GO_Term_Name,
#ColSideColors = ev_repeat,
# ColSideColors = ev_repeat,
labRow = as.character(Genes_Annotated_to_Term$Gene)
))
}
@@ -499,7 +511,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size >= 500 && Parent_Size <= 1000) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 30, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 30,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -519,7 +538,7 @@ for (s in 1:dim(XX3)[1]) {
keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = GO_Term_Name,
#ColSideColors = ev_repeat,
# ColSideColors = ev_repeat,
labRow = as.character(Genes_Annotated_to_Term$Gene)
))
}
@@ -528,7 +547,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size >= 200 && Parent_Size <= 500) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 25, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 25,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -548,7 +574,7 @@ for (s in 1:dim(XX3)[1]) {
keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = GO_Term_Name,
#ColSideColors = ev_repeat,
# ColSideColors = ev_repeat,
labRow = as.character(Genes_Annotated_to_Term$Gene)
))
}
@@ -557,7 +583,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size >= 100 && Parent_Size <= 200) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 20, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 20,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -577,7 +610,7 @@ for (s in 1:dim(XX3)[1]) {
keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = GO_Term_Name,
#ColSideColors = ev_repeat,
# ColSideColors = ev_repeat,
labRow = as.character(Genes_Annotated_to_Term$Gene)
))
}
@@ -586,7 +619,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size >= 60 && Parent_Size <= 100) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 15, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 15,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -606,7 +646,7 @@ for (s in 1:dim(XX3)[1]) {
keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = GO_Term_Name,
#ColSideColors = ev_repeat,
# ColSideColors = ev_repeat,
labRow = as.character(Genes_Annotated_to_Term$Gene)
))
}
@@ -615,7 +655,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size >= 30 && Parent_Size <= 60) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 10, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 10,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -650,7 +697,7 @@ for (s in 1:dim(XX3)[1]) {
keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = GO_Term_Name,
#ColSideColors = ev_repeat,
# ColSideColors = ev_repeat,
labRow = as.character(Genes_Annotated_to_Term$Gene)
))
}
@@ -660,7 +707,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size >= 3 && Parent_Size <= 30) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 7,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -704,7 +758,14 @@ for (s in 1:dim(XX3)[1]) {
}
if (Parent_Size == 2) {
pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE)
pdf(
file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
width = 12,
height = 7,
onefile = TRUE
)
for (i in 1:length(GOTerm_parent)) {
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])