697 Zeilen
33 KiB
R
697 Zeilen
33 KiB
R
#!/usr/bin/env Rscript
|
|
# Makes heat maps of multiple experiments
|
|
#
|
|
# Updated 240724 Bryan C Roessler to improve file operations and portability
|
|
# I tried to leave as much logic intact as possible, just feeding in vars in a better way
|
|
# NOTE: The script now has 7 required arguments and a variable number of input experiments
|
|
# @arg $1 string StudyInfo.csv file
|
|
# @arg $2 string gene_ontology_edit.obo file
|
|
# @arg $3 string go_terms.tab file
|
|
# @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
|
|
# @arg $5 string ZScores_interaction.csv
|
|
# @arg $6 string base directory
|
|
# @arg $7 string output directory
|
|
|
|
library("ontologyIndex")
|
|
library("ggplot2")
|
|
library("RColorBrewer")
|
|
library("grid")
|
|
library("ggthemes")
|
|
#library("plotly")
|
|
#library("htmlwidgets")
|
|
library("extrafont")
|
|
library("stringr")
|
|
library("org.Sc.sgd.db")
|
|
library("ggrepel")
|
|
library("gplots")
|
|
|
|
# Load arguments
|
|
args <- commandArgs(TRUE)
|
|
study_info_file <- args[1]
|
|
ontology_file <- args[2]
|
|
sgd_terms_tfile <- args[3]
|
|
all_sgd_terms_csv <- args[4]
|
|
zscores_file <- args[5]
|
|
base_dir <- args[6]
|
|
output_dir <- args[7]
|
|
study_nums <- args[8:length(args)]
|
|
|
|
#import standard tables used in Sean's code That should be copied to each ExpStudy
|
|
labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE)
|
|
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
|
|
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
|
|
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
|
XX3 <- read.csv(file=all_sgd_terms_csv,stringsAsFactors=FALSE,header = TRUE)
|
|
XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
|
|
XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
|
|
XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
|
|
|
|
# Load input files
|
|
for (study_num in study_nums) {
|
|
input_file <- file.path(base_dir,paste('Exp',study_num),zscores_file)
|
|
if (file.exists(input_file)) {
|
|
assign(paste(X, study_num), read.csv(file=input_file,stringsAsFactors=FALSE,header=TRUE))
|
|
assign(paste(Name, study_num), labels[study_num,2])
|
|
}
|
|
}
|
|
|
|
for (study_num in study_nums) {
|
|
eval(paste("function",study_num))
|
|
}
|
|
|
|
if (length(study_nums) > 0) {
|
|
X1$ORF <- X1$OrfRep
|
|
X1$ORF <- gsub("_1","",x=X1$ORF)
|
|
X1$ORF <- gsub("_2","",x=X1$ORF)
|
|
X1$ORF <- gsub("_3","",x=X1$ORF)
|
|
X1$ORF <- gsub("_4","",x=X1$ORF)
|
|
|
|
X1$Score_L <- "No Effect"
|
|
try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
|
|
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
|
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
|
|
|
X1$Score_K <- "No Effect"
|
|
try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
|
|
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
|
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
|
|
|
#express the na data as 0.001 in X1 for K and L
|
|
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
|
|
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
|
|
X1$Rank_L <- rank(X1$Z_lm_L)
|
|
X1$Rank_K <- rank(X1$Z_lm_K)
|
|
|
|
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
|
|
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
|
|
}
|
|
|
|
if (length(study_nums) > 1) {
|
|
X2$ORF <- X2$OrfRep
|
|
X2$ORF <- gsub("_1","",x=X2$ORF)
|
|
X2$ORF <- gsub("_2","",x=X2$ORF)
|
|
X2$ORF <- gsub("_3","",x=X2$ORF)
|
|
X2$ORF <- gsub("_4","",x=X2$ORF)
|
|
|
|
X2$Score_L <- "No Effect"
|
|
try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
|
|
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
|
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
|
|
|
X2$Score_K <- "No Effect"
|
|
try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
|
|
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
|
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
|
#express the na data as 0.001 in X2
|
|
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
|
|
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
|
|
|
|
X2$Rank_L <- rank(X2$Z_lm_L)
|
|
X2$Rank_K <- rank(X2$Z_lm_K)
|
|
|
|
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
|
|
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
|
|
X <- cbind(X1,X2)
|
|
}
|
|
|
|
if (length(study_nums) > 2) {
|
|
X3$ORF <- X3$OrfRep
|
|
X3$ORF <- gsub("_1","",x=X3$ORF)
|
|
X3$ORF <- gsub("_2","",x=X3$ORF)
|
|
X3$ORF <- gsub("_3","",x=X3$ORF)
|
|
X3$ORF <- gsub("_4","",x=X3$ORF)
|
|
|
|
X3$Score_L <- "No Effect"
|
|
try(X3[is.na(X3$Z_lm_L),]$Score_L <- "No Growth")
|
|
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
|
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
|
|
|
X3$Score_K <- "No Effect"
|
|
try(X3[is.na(X3$Z_lm_K),]$Score_K <- "No Growth")
|
|
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
|
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
|
#express the na data as 0.001 in X3
|
|
X3[is.na(X3$Z_lm_L),]$Z_lm_L <- 0.001
|
|
X3[is.na(X3$Z_lm_K),]$Z_lm_K <- 0.001
|
|
|
|
X3$Rank_L <- rank(X3$Z_lm_L)
|
|
X3$Rank_K <- rank(X3$Z_lm_K)
|
|
|
|
X3 <- X3[order(X3$OrfRep,decreasing = FALSE),]
|
|
colnames(X3) <- paste(colnames(X3),"_X3",sep="")
|
|
X <- cbind(X,X3)
|
|
}
|
|
|
|
if (length(study_nums) > 3) {
|
|
X4$ORF <- X4$OrfRep
|
|
X4$ORF <- gsub("_1","",x=X4$ORF)
|
|
X4$ORF <- gsub("_2","",x=X4$ORF)
|
|
X4$ORF <- gsub("_3","",x=X4$ORF)
|
|
X4$ORF <- gsub("_4","",x=X4$ORF)
|
|
|
|
X4$Score_L <- "No Effect"
|
|
try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
|
|
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
|
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
|
|
|
X4$Score_K <- "No Effect"
|
|
try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
|
|
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
|
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
|
#express the na data as 0.001 in X4
|
|
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
|
|
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
|
|
|
|
X4$Rank_L <- rank(X4$Z_lm_L)
|
|
X4$Rank_K <- rank(X4$Z_lm_K)
|
|
|
|
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
|
|
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
|
|
X <- cbind(X,X4)
|
|
}
|
|
|
|
if (length(study_nums) > 4) {
|
|
X5$ORF <- X5$OrfRep
|
|
X5$ORF <- gsub("_1","",x=X5$ORF)
|
|
X5$ORF <- gsub("_2","",x=X5$ORF)
|
|
X5$ORF <- gsub("_3","",x=X5$ORF)
|
|
X5$ORF <- gsub("_4","",x=X5$ORF)
|
|
|
|
X5$Score_L <- "No Effect"
|
|
try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
|
|
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
|
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
|
|
|
X5$Score_K <- "No Effect"
|
|
try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
|
|
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
|
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
|
#express the na data as 0.001 in X5
|
|
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
|
|
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
|
|
|
|
X5$Rank_L <- rank(X5$Z_lm_L)
|
|
X5$Rank_K <- rank(X5$Z_lm_K)
|
|
|
|
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
|
|
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
|
|
X <- cbind(X,X5)
|
|
}
|
|
|
|
X$ORF <- X$OrfRep_X1
|
|
|
|
if (length(study_nums) > 1) {
|
|
|
|
X$ORF <- gsub("_1","",x=X$ORF)
|
|
|
|
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
|
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
|
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
|
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
|
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
|
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
|
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" ]
|
|
|
|
X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
|
|
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
|
colnames(X_heatmap)[2] <- "Gene"
|
|
}
|
|
|
|
|
|
if (length(study_nums) > 2) {
|
|
X$ORF <- gsub("_1","",x=X$ORF)
|
|
X$ORF <- gsub("_2","",x=X$ORF)
|
|
|
|
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
|
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
|
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
|
|
|
|
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
|
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
|
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
|
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
|
|
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
|
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
|
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" ]
|
|
#Reorder columns
|
|
X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)] #Three
|
|
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
|
|
colnames(X_heatmap)[2] <- "Gene"
|
|
}
|
|
|
|
if (length(study_nums) > 3) {
|
|
X$ORF <- gsub("_1","",x=X$ORF)
|
|
X$ORF <- gsub("_2","",x=X$ORF)
|
|
X$ORF <- gsub("_3","",x=X$ORF)
|
|
|
|
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
|
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
|
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
|
|
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
|
|
|
|
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
|
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
|
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
|
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
|
|
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
|
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
|
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
|
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
|
|
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" ]
|
|
#Reorder columns
|
|
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)] #Four
|
|
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
|
|
colnames(X_heatmap)[2] <- "Gene"
|
|
}
|
|
|
|
|
|
if (length(study_nums) > 4) {
|
|
X$ORF <- gsub("_1","",x=X$ORF)
|
|
X$ORF <- gsub("_2","",x=X$ORF)
|
|
X$ORF <- gsub("_3","",x=X$ORF)
|
|
X$ORF <- gsub("_4","",x=X$ORF)
|
|
|
|
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
|
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
|
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
|
|
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
|
|
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
|
|
|
|
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
|
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
|
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
|
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
|
|
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
|
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
|
|
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
|
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
|
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
|
|
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
|
|
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
|
|
#Reorder columns
|
|
X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
|
|
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
|
|
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name5,colnames(X_heatmap))
|
|
|
|
colnames(X_heatmap)[2] <- "Gene"
|
|
}
|
|
|
|
|
|
#theme elements for plots
|
|
theme_Publication <- function(base_size=14, base_family="sans") {
|
|
(theme_foundation(base_size=base_size, base_family=base_family)
|
|
+ theme(plot.title = element_text(face = "bold",
|
|
size = rel(1.2), hjust = 0.5),
|
|
text = element_text(),
|
|
panel.background = element_rect(colour = NA),
|
|
plot.background = element_rect(colour = NA),
|
|
panel.border = element_rect(colour = NA),
|
|
axis.title = element_text(face = "bold",size = rel(1)),
|
|
axis.title.y = element_text(angle=90,vjust =2),
|
|
axis.title.x = element_text(vjust = -0.2),
|
|
axis.text = element_text(),
|
|
axis.line = element_line(colour="black"),
|
|
axis.ticks = element_line(),
|
|
panel.grid.major = element_line(colour="#f0f0f0"),
|
|
panel.grid.minor = element_blank(),
|
|
legend.key = element_rect(colour = NA),
|
|
legend.position = "bottom",
|
|
legend.direction = "horizontal",
|
|
legend.key.size= unit(0.2, "cm"),
|
|
legend.spacing = unit(0, "cm"),
|
|
legend.title = element_text(face="italic"),
|
|
plot.margin=unit(c(10,5,5,5),"mm"),
|
|
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
|
strip.text = element_text(face="bold")
|
|
))
|
|
|
|
}
|
|
|
|
scale_fill_Publication <- function(...){
|
|
library(scales)
|
|
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
scale_colour_Publication <- function(...){
|
|
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
|
|
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
|
(theme_foundation(base_size=base_size, base_family=base_family)
|
|
+ theme(plot.title = element_text(face = "bold",
|
|
size = rel(1.2), hjust = 0.5),
|
|
text = element_text(),
|
|
panel.background = element_rect(colour = NA),
|
|
plot.background = element_rect(colour = NA),
|
|
panel.border = element_rect(colour = NA),
|
|
axis.title = element_text(face = "bold",size = rel(1)),
|
|
axis.title.y = element_text(angle=90,vjust =2),
|
|
axis.title.x = element_text(vjust = -0.2),
|
|
axis.text = element_text(),
|
|
axis.line = element_line(colour="black"),
|
|
axis.ticks = element_line(),
|
|
panel.grid.major = element_line(colour="#f0f0f0"),
|
|
panel.grid.minor = element_blank(),
|
|
legend.key = element_rect(colour = NA),
|
|
legend.position = "right",
|
|
legend.direction = "vertical",
|
|
legend.key.size= unit(0.5, "cm"),
|
|
legend.spacing = unit(0, "cm"),
|
|
legend.title = element_text(face="italic"),
|
|
plot.margin=unit(c(10,5,5,5),"mm"),
|
|
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
|
strip.text = element_text(face="bold")
|
|
))
|
|
|
|
}
|
|
|
|
scale_fill_Publication <- function(...){
|
|
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
scale_colour_Publication <- function(...){
|
|
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
|
|
|
}
|
|
|
|
|
|
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
|
|
print(Ontology)
|
|
#all ORFs associated with GO term
|
|
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
|
|
|
#Terms is the GO term list jwr moved up to TAABLES
|
|
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
|
|
|
|
|
#BIG LOOP BIG LOOP ------------------------------------------------------
|
|
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
|
for(s in 1:dim(XX3)[1]){
|
|
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
|
|
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
|
|
#GO_ID_Arg <- "GO:0006325"
|
|
GO_ID_Arg_loop <- as.character(XX3[s,1])
|
|
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
|
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
|
#only make plots if parent term has fewer than 500 children
|
|
|
|
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
|
|
|
if(length(GOTerm_parent) > 100){
|
|
next()
|
|
}
|
|
|
|
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
|
if(Parent_Size < 2){
|
|
next()
|
|
}
|
|
if(Parent_Size > 2000){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
|
|
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
if(Parent_Size >= 200 && Parent_Size <= 500){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
if(Parent_Size >= 100 && Parent_Size <= 200){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
if(Parent_Size >= 60 && Parent_Size <= 100){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
if(Parent_Size >= 30 && Parent_Size <= 60){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
|
|
dev.off()
|
|
}
|
|
if(Parent_Size >= 3 && Parent_Size <= 30){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
if(Parent_Size == 2){
|
|
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
|
for(i in 1:length(GOTerm_parent)){
|
|
GO_Term <- GOTerm_parent[i]
|
|
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
|
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
|
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
|
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
|
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
|
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
|
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
|
try(heatmap.2(x=X0,
|
|
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
|
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
|
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
|
ylab = "Gene",
|
|
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
|
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
|
na.color="red", col=brewer.pal(11,"PuOr"),
|
|
main=GO_Term_Name,
|
|
#ColSideColors=ev_repeat,
|
|
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
|
}
|
|
}
|
|
dev.off()
|
|
}
|
|
}
|