First attempt at script-run-workflow

This commit is contained in:
2024-07-21 23:50:24 -04:00
parent 628356652d
commit 06dd700680
290 changed files with 5524411 additions and 0 deletions

View File

@@ -0,0 +1,765 @@
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library(gplots)
Wstudy=getwd()
inputFile= 'asdf'
inputFile[1] <- "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
inputFile[2] <- "../Exp2/ZScores/ZScores_Interaction.csv"
inputFile[3] <- "../Exp3/ZScores/ZScores_Interaction.csv"
inputFile[4] <- "../Exp4/ZScores/ZScores_Interaction.csv"
inputFile[5] <- "../Exp5/ZScores/ZScores_Interaction.csv"
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
lstExpNum= 'asdf'
a=0
#R can't index data.frame's so some stupid nasty code must be implemented
if (file.exists(inputFile[1])){
X1 <- (read.csv(file = inputFile[1],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 1
Name1 <- labels[1,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
if (file.exists(inputFile[2])){
X2 <- (read.csv(file = inputFile[2],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 2
Name2 <- labels[2,2]}
if (file.exists(inputFile[3])){
X3 <- (read.csv(file = inputFile[3],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 3
Name3 <- labels[3,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
if (file.exists(inputFile[4])){
X4 <- (read.csv(file = inputFile[4],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 4
Name4 <- labels[4,2]}
if (file.exists(inputFile[5])){
X5 <- (read.csv(file = inputFile[5],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 5
Name5 <- labels[5,2]}
#--------------------------Standard Names-------------------------------------------------------
#ExpNames= c('Exp1','Exp2','Exp3','Exp4','Exp5')
#expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
#expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
#expNumber3<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[3]))
#expNumber4<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[4]))
#Name1= ExpNames[1]
#Name2= ExpNames[2]
#Name3= ExpNames[3]
#Name4= ExpNames[4]
#Name5= ExpNames[5]
print('52')
#----------------------TABLES------------------------------------------------------------
#import standard tables used in Sean's code That should be copied to each ExpStudy
ontology_obo_input <- "gene_ontology_edit.obo" #paste0(Wstudy,"/Code/gene_ontology_edit.obo") # Args[5]
GOtermstab_file <- "go_terms.tab"
GO_ID_Arg <-"All_SGD_GOTerms_for_QHTCPtk.csv" #All_SGD_GOTerms.csv #Args[7]
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#----------------------------------------------------------------------------
#The directory to put the results into. Sean puts this in the 'Home' directory
subDir <- "../TermSpecificHeatmaps"
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
outputpath <- "../TermSpecificHeatmaps/" #paste0(Wstudy,"/TermSpecificHeatmaps/")
print(outputpath)
print('73')
#-----------------------------------------------------------------------------------
XX3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
print('80')
#rm(X,)
#Exp1--------------------------------------------------
if (file.exists(inputFile[1])){
print('84')
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X1$Score_K <- "No Effect"
try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
X1$Rank_L <- rank(X1$Z_lm_L)
X1$Rank_K <- rank(X1$Z_lm_K)
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
print('110')
}
print('112')
#Exp2------------------------------------------------------------------------
if (file.exists(inputFile[2])){
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X2$Score_K <- "No Effect"
try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
X2$Rank_L <- rank(X2$Z_lm_L)
X2$Rank_K <- rank(X2$Z_lm_K)
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
X <- cbind(X1,X2)
print('file2-140')
}
print('142')
#Exp3--------------------------------------------------------------
if (file.exists(inputFile[3])){
X3$ORF <- X3$OrfRep
X3$ORF <- gsub("_1","",x=X3$ORF)
X3$ORF <- gsub("_2","",x=X3$ORF)
X3$ORF <- gsub("_3","",x=X3$ORF)
X3$ORF <- gsub("_4","",x=X3$ORF)
X3$Score_L <- "No Effect"
try(X3[is.na(X3$Z_lm_L),]$Score_L <- "No Growth")
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X3$Score_K <- "No Effect"
try(X3[is.na(X3$Z_lm_K),]$Score_K <- "No Growth")
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X3
X3[is.na(X3$Z_lm_L),]$Z_lm_L <- 0.001
X3[is.na(X3$Z_lm_K),]$Z_lm_K <- 0.001
X3$Rank_L <- rank(X3$Z_lm_L)
X3$Rank_K <- rank(X3$Z_lm_K)
X3 <- X3[order(X3$OrfRep,decreasing = FALSE),]
colnames(X3) <- paste(colnames(X3),"_X3",sep="")
X <- cbind(X,X3)
}
#Exp4
if (file.exists(inputFile[4])){
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
X4$ORF <- gsub("_3","",x=X4$ORF)
X4$ORF <- gsub("_4","",x=X4$ORF)
X4$Score_L <- "No Effect"
try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X4$Score_K <- "No Effect"
try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X4
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
X4$Rank_L <- rank(X4$Z_lm_L)
X4$Rank_K <- rank(X4$Z_lm_K)
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
X <- cbind(X,X4)
}
#Exp5--------------------------------------------------------------
if (file.exists(inputFile[5])){
X5$ORF <- X5$OrfRep
X5$ORF <- gsub("_1","",x=X5$ORF)
X5$ORF <- gsub("_2","",x=X5$ORF)
X5$ORF <- gsub("_3","",x=X5$ORF)
X5$ORF <- gsub("_4","",x=X5$ORF)
X5$Score_L <- "No Effect"
try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X5$Score_K <- "No Effect"
try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X5
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
X5$Rank_L <- rank(X5$Z_lm_L)
X5$Rank_K <- rank(X5$Z_lm_K)
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
X <- cbind(X,X5)
}
print('after5-229')
X$ORF <- X$OrfRep_X1
part1=1
if (file.exists(inputFile[1])& file.exists(inputFile[2])){
print('234 ifFile2')
X$ORF <- gsub("_1","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" ]
X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (file.exists(inputFile[3])){
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" ]
#Reorder columns
X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)] #Three
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (file.exists(inputFile[4])){
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" ]
#Reorder columns
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)] #Four
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (file.exists(inputFile[5])){
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
#Reorder columns
X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name5,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
#-----------------------------------------------------------------------------------------
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
print(Ontology)
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
print('429')
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list jwr moved up to TAABLES
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------
#BIG LOOP BIG LOOP ------------------------------------------------------
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(XX3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(XX3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
print('for(s in 1:dim(XX3)[1]){ -454')
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
print('pdf2000 469')
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
print('740')
}
dev.off()
}
}