Commit earlier refactoring

This commit is contained in:
2024-07-29 11:44:45 -04:00
parent 29cbce0754
commit 527068e683
294 changed files with 5524008 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,249 @@
# JoinInteractExps3dev.R
library(plyr)
library(sos)
library(dplyr)
args <- commandArgs(TRUE)
# Set output dir
if (length(args) > 1) {
outDir <- args[1]
} else {
outDir <- "./" # for legacy workflow
}
# Set sd value
if (length(args) > 2) {
sd <- args[2]
} else {
sd <- 2 # default value
}
# Set studyInfo file
if (length(args) > 3) {
studyInfo <- args[3]
} else {
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
}
studies <- args[3:length(args)]
# # Set SGDgeneList file path
# if (length(args) > 3) {
# SGDgeneList <- args[4]
# } else {
# SGDgeneList <- "../Code/SGD_features.tab" # for legacy workflow
# }
# if (length(args) > 4) {
# outDir <- args[2]
# } else {
# outDir <- "/ZScores/" # for legacy workflow
# }
#The input files should be entered in order from the greatest number of rows(Orfs) to least.
#Args <- commandArgs(TRUE)
#if(length(Args)==0){
# std=0
#}else{
# std=Args[1]
#}
print(paste("SD=",std))
inputFiles <- c()
for (study in 1:length(studies)) {
zsFile <- file.path(study, 'zscores', 'zscores_interaction.csv')
if (file.exists(zsFile)) {
inputFiles[study] <- zsFile
}
}
print(length(inputFiles)) #display the number of arguments on terminal
# Read in the files for your experiment and
# Join the two files at a time as a function of how many inputFile, list the larger file first ? in this example X2 has the larger number of genes.
# If X1 has a larger number of genes, switch the order of X1 and X2
if(length(inputFiles)==2) {
X1 <- read.csv(file=inputFiles[1],stringsAsFactors = FALSE)
X2 <- read.csv(file=inputFiles[2],stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1') #remove 'Gene.1 column
headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums
headSel2 = select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums
}else if(length(inputFiles)==3){
X1 <- read.csv(file=inputFiles[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file=inputFiles[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file=inputFiles[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
X <- join(X,X3,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1',-'Gene.2')
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
headSel2 = select(headSel2, -'Gene.1',-'Gene.2')
}else if(length(inputFiles)==4){
X1 <- read.csv(file=inputFiles[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file=inputFiles[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file=inputFiles[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
X4 <- read.csv(file=inputFiles[4],stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
X <- join(X,X3,by="OrfRep")
X <- join(X,X4,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1',-'Gene.2',-'Gene.3')
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
headSel2 = select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3')
}
#headSel$contains('Z_Shift') %>% replace_na(0.001)
headers<-colnames(headSel)
i=0
for(i in 1:length(headers)){
if(grepl("Shift",headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.001
}
if(grepl("Z_lm_",headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.0001
}
}
#2SD option code to exclude Z_lm values less than 2 standard Deviations
REMcRdy= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_'))
shiftOnly= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift'))
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE,sep= ",")
# Using Text search grepl to relabel headers
REMcRdyHdr= colnames(REMcRdy)
REMcRdyLabels= 'asdf'
shftHdr= colnames(shiftOnly)
shiftLabels='asdf'
shiftLabels[1:2]<-shftHdr[1:2]
REMcRdyLabels[1:2]<-REMcRdyHdr[1:2]
for(i in 3:(length(shftHdr))){
if(i==3){
shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3])
REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) }
if(i==5){
shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5])
REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5])
}
if(i==7){
shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7])
REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7])
}
if(grepl(".1",shftHdr[i],fixed=true)){
shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i])
REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])}
if (grepl(".2",shftHdr[i],fixed=true)){
shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i])
REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])}
if(grepl(".3",shftHdr[i],fixed=true)){
shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i])
REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])}
}
for(i in 3:(length(REMcRdyLabels))){
j=as.integer(i)
REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j])
shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j])
}
colnames(shiftOnly)<- shiftLabels
colnames(REMcRdy)<- REMcRdyLabels
combI=headSel2 #Starting Template orf, Genename columns
# headersRemc<-colnames(REMcRdy)
# Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
for(i in 3:length(colnames(REMcRdy))){
combI=cbind.data.frame(combI, shiftOnly[i])
combI=cbind.data.frame(combI, REMcRdy[i])
}
Vec1= NA
Vec2= NA
Vec3= NA
Vec4= NA
Vec5= NA
Vec6= NA
Vec7= NA
Vec8= NA
if(length(REMcRdy)==6){
Vec1=abs(REMcRdy[,3])>=std
Vec2=abs(REMcRdy[,4])>=std
Vec3=abs(REMcRdy[,5])>=std
Vec4=abs(REMcRdy[,6])>=std
bolVec= Vec1 | Vec2 |Vec3 |Vec4
REMcRdyGT2=REMcRdy[bolVec,1:2]
REMcRdyGT2[ ,3:6]= REMcRdy[bolVec,3:6]
shiftOnlyGT2=shiftOnly[bolVec,1:2]
shiftOnlyGT2[ ,3:6]= shiftOnly[bolVec,3:6]
}
if(length(REMcRdy)==8){
Vec1=abs(REMcRdy[,3])>=std
Vec2=abs(REMcRdy[,4])>=std
Vec3=abs(REMcRdy[,5])>=std
Vec4=abs(REMcRdy[,6])>=std
Vec5=abs(REMcRdy[,7])>=std
Vec6=abs(REMcRdy[,8])>=std
bolVec= Vec1 | Vec2 |Vec3 | Vec4 |Vec5 |Vec6
REMcRdyGT2= REMcRdy[bolVec,1:2]
REMcRdyGT2[ ,3:8]= REMcRdy[bolVec,3:8]
shiftOnlyGT2= shiftOnly[bolVec,1:2]
shiftOnlyGT2[ ,3:8]= shiftOnly[bolVec,3:8]
}
if(length(REMcRdy)==10){
Vec1=abs(REMcRdy[,3])>=std
Vec2=abs(REMcRdy[,4])>=std
Vec3=abs(REMcRdy[,5])>=std
Vec4=abs(REMcRdy[,6])>=std
Vec5=abs(REMcRdy[,7])>=std
Vec6=abs(REMcRdy[,8])>=std
Vec7=abs(REMcRdy[,9])>=std
Vec8=abs(REMcRdy[,10])>=std
bolVec= Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8
REMcRdyGT2= REMcRdy[bolVec,1:2]
REMcRdyGT2[ ,3:10]= REMcRdy[bolVec,3:10]
shiftOnlyGT2= shiftOnly[bolVec,1:2]
shiftOnlyGT2[ ,3:10]= shiftOnly[bolVec,3:10]
}
if(std!=0){
REMcRdy= REMcRdyGT2 #[,2:length(REMcRdyGT2)]
shiftOnly= shiftOnlyGT2 #[,2:length(shiftOnlyGT2)]
}
if(std==0){
REMcRdy= REMcRdy #[,2:length(REMcRdy)]
shiftOnly= shiftOnly #[,2:length(shiftOnly)]
}
# R places hidden "" around the header names. The following
# is intended to remove those quote so that the "" do not blow up the Java REMc.
# Use ,quote=F in the write.csv statement to fix R output file.
#write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"),row.names = FALSE)
write.csv(REMcRdy,file=file.path(outDir,"REMcRdy_lm_only.csv"),row.names = FALSE, quote=F)
write.csv(shiftOnly,file=file.path(outDir,"Shift_only.csv"),row.names = FALSE, quote=F)
#LabelStd <- read.table(file= "./parameters.csv",stringsAsFactors = FALSE,sep= ",")
LabelStd<- read.csv(file=studyInfo,stringsAsFactors = FALSE)
print(std)
LabelStd[,4]= as.numeric(std)
write.csv(LabelStd,file=file.path(outDir,"parameters.csv"),row.names = FALSE)
write.csv(LabelStd,file=studyInfo,row.names = FALSE)

View File

@@ -0,0 +1,296 @@
timestamp()
#version 10 - edited the way clusters are found - problem with clusters 1-0-1 having genes in it from 1-0-11
Args <- commandArgs(TRUE)
#need to give the input "finalTable.csv" file after running REMc generated by eclipse
input_finalTable <- Args[1]
subDir <- Args[2]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
outputpath <- Args[2]
library(RColorBrewer)
library(gplots)
hmapfile <- data.frame(read.csv(file=input_finalTable,header=TRUE,sep=",",stringsAsFactors = FALSE))
#set NAs to NA
hmapfile[hmapfile == -100] <- NA
hmapfile[hmapfile == 100] <- NA
hmapfile[hmapfile == 0.001] <- NA
hmapfile[hmapfile == -0.001] <- NA
#select the number of rows based on the number of genes
num_total_genes <- length(hmapfile[,1])
#break out the cluster names so each part of the cluster origin can be accessed
#line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
#hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
#use tail(x,n) for accessing the outward most cluster
clust_rounds <- 0
for(i in 1:num_total_genes){
if(length(hmapfile$cluster.origin[[i]]) > clust_rounds){
clust_rounds <- length(hmapfile$cluster.origin[[i]])
}
}
unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
unique_clusts <- unique_clusts[unique_clusts != " "]
#select only the unique cluster names
unique_clusts <- sort(unique(unlist(unique_clusts,use.names= FALSE)),decreasing=FALSE)
num_unique_clusts <- length(unique_clusts)
#base the color key on a statistical analysis of the L and K data
#need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
#scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
#hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
#change so that the L data is multiplied to be on the same scale as the K data
KEY_MIN <- 0
KEY_MAX <- 0
K_MIN <- 0
L_MAX <- 0
KcolumnValues <- vector()
LcolumnValues <- vector()
for(i in 4:(length(hmapfile[1,]) - 2)){
if(grepl("_Z_lm_K",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
KcolumnValues <- append(KcolumnValues,i)
}
if(grepl("_Z_lm_L",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
LcolumnValues <- append(LcolumnValues,i)
}
}
#L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[4]
#K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[2]
#L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm=TRUE)[4]
#K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm=TRUE)[2]
#Z scores are
L_MAX <- 12
K_MIN <- -12
#L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
#hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
#if(grepl("SHIFT",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
# print("FOUND SHIFT VALUES")
# hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
#}
#KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
#KEY_MIN <- as.numeric(K_MIN)
KEY_MAX <- as.numeric(L_MAX)
KEY_MIN <- as.numeric(K_MIN)
print(KEY_MIN)
print(L_MAX)
#print(L_Multiplier)
colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
#print(colormapbreaks)
#probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
#however since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
#then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
#could we pool all the shift data/average it?
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=2)
#ev_repeat = rep("white",length(even_columns))
#ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
#middle_col <- (length(hmapfile[1,]) - 5)/2
#ev_repeat[(middle_col/2)] <- "black"
#print(ev_repeat)
}
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == FALSE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=1)
print("NO SHIFT VALS FOUND")
}
#FOR THIS SCRIPT ONLY (rap tem hu script)
#even_columns <- c(2,5,7,10,12,15,17)
#m <- 0
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1,]) - 2)])
#print(colnames_edit)
for(i in 1:length(colnames_edit)){
if(grepl("Shift",colnames_edit[i],fixed=TRUE) == TRUE){
colnames_edit[i] <- ""
colnames_edit[i+1] <- gsub(pattern = "_Z_lm_",replacement = " ",x = colnames_edit[i+1])
try(colnames_edit[i+1] <- gsub(pattern = "_",replacement = " ",x = colnames_edit[i+1]))
# INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
# print(length(unlist(INT_store)))
# if(length(unlist(INT_store)) == 4){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
# }
# if(length(unlist(INT_store)) == 3){
#
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
# }
# if(length(unlist(INT_store)) == 5){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep=" ")
# }
# if(length(unlist(INT_store)) == 6){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
# }
}
}
print(colnames_edit)
#break()
#colnames_edit[5] <- "TEM HLEG K"
#colnames_edit[10] <- "TEM HL K"
#colnames_edit[15] <- "TEM HLEG L"
#colnames_edit[20] <- "TEM HL L"
#create the heatmaps
for(i in 1:num_unique_clusts){
cluster <- unique_clusts[i]
cluster_data <- subset(hmapfile,grepl(cluster,cluster.origin))
cluster_length <- length(cluster_data[,1])
if(cluster_length != 1){
X0 <- as.matrix(cluster_data[,4:(length(hmapfile[1,]) - 2)])
if(cluster_length >= 2001){
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=20,width=15)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor= "white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
#cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
#ColSideColors=ev_repeat,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
#abline(v=0.5467,col="black")
dev.off()
}
if(cluster_length >= 201 && cluster_length <= 2000){
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=15,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
#abline(v=0.5316,col="black")
dev.off()
}
if(cluster_length >= 150 && cluster_length <= 200){
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.2, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length >= 101 && cluster_length <= 149){
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length >= 60 && cluster_length <= 100){
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
breaks=colormapbreaks,symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length <= 59 && cluster_length >= 30){
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=9,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
if(cluster_length <= 29){
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=7,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA,
distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
dev.off()
}
}
#print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
}
timestamp()

View File

@@ -0,0 +1,39 @@
# This version of Exclude_DAmPs was modified by Remy. It
# assumes that underscores included in OrfRep names are
# already in the DAmPList, because in some cases an OrfRep
# may have _1 being a non-DAmP and _2 being a DAmP. It is
# not a general rule so it is better just to use a list
# generated from the Master Plate sheet directly rather
# than trying to deal with underscores in this script.
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the input file (ZScores_Interaction.csv, REMcReady.csv, other files with OrfRep col) for your genome wide YKO/YKD experiment
input_file1 <- Args[1]
#Arg 2 we need to supply the DAmPs list so we can remove these genes
DAmPs_List <- Args[2]
#Arg 3 is the output file
output_file <- Args[3]
X <- read.csv(file=input_file1,stringsAsFactors = FALSE)
Damps <- read.delim(DAmPs_List,header=F)
#create a column in X1 called ORF so we can remove OrfRep numbers and find all the DAmPs
X$ORF <- X$OrfRep
# Sean: remove _1-4 from newly created ORF column
# Remy: following 4 lines are unnecessary
# X$ORF <- gsub("_1","",x=X$ORF)
# X$ORF <- gsub("_2","",x=X$ORF)
# X$ORF <- gsub("_3","",x=X$ORF)
# X$ORF <- gsub("_4","",x=X$ORF)
X <- X[!(X$ORF %in% Damps$V1),]
last_col <- dim(X)[2]
X <- X[,1:(last_col-1)]
write.csv(X,file = output_file,row.names = FALSE)

View File

@@ -0,0 +1,374 @@
library(ggplot2)
library(plotly)
library(htmlwidgets)
library(extrafont)
library(grid)
library(ggthemes)
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
#Arg1 is Average_GOTerms_All_1.csv
#Arg2 is the name to give GTF results 1
#Arg3 is Average_GOTerms_All2.csv
#Arg4 is the name to give GTF results 2
#Arg5 is the directory to put the files into
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of GTF results 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of GTF results 2 to print in the results
Name2 <- Args[4]
#arg 5 is the directory to put the results into (and create that directory if needed)
subDir <- Args[5]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[5]
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
#only output GTF terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)

View File

@@ -0,0 +1,374 @@
library(ggplot2)
library(plotly)
library(htmlwidgets)
library(extrafont)
library(grid)
library(ggthemes)
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
#Arg1 is Average_GOTerms_All_1.csv
#Arg2 is the name to give GTF results 1
#Arg3 is Average_GOTerms_All2.csv
#Arg4 is the name to give GTF results 2
#Arg5 is the directory to put the files into
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of GTF results 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of GTF results 2 to print in the results
Name2 <- Args[4]
#arg 5 is the directory to put the results into (and create that directory if needed)
subDir <- Args[5]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[5]
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
#only output GTF terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)

View File

@@ -0,0 +1,374 @@
library(ggplot2)
library(plotly)
library(htmlwidgets)
library(extrafont)
library(grid)
library(ggthemes)
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
#Arg1 is Average_GOTerms_All_1.csv
#Arg2 is the name to give GTF results 1
#Arg3 is Average_GOTerms_All2.csv
#Arg4 is the name to give GTF results 2
#Arg5 is the directory to put the files into
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of GTF results 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of GTF results 2 to print in the results
Name2 <- Args[4]
#arg 5 is the directory to put the results into (and create that directory if needed)
subDir <- Args[5]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[5]
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
#only output GTF terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
saveWidget(pgg, plotly_path,
selfcontained =TRUE)
write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)

View File

@@ -0,0 +1,739 @@
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library("gplots")
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
Name2 <- Args[4]
#Arg 5 is the GTF results 3
input_file3 <- Args[5]
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
Name3 <- Args[6]
#Arg 7 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
ontology_obo_input <- Args[7]
#Arg 8 is the go_terms.tab file
GOtermstab_file <- Args[8]
#Arg 9 is the GO:ID - example: chromatin organization is GO:0006325
#for all processes use biological process GO:0008150
#all functions use molecular function GO:0003674
#all components use cellular component GO:0005575
GO_ID_Arg <- Args[9]
#arg 10 is the directory to put the results into (and create that directory if needed)
subDir <- Args[10]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[10]
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
# if (file.exists(outputpath_X1_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X1_rank))
# }
#
# if (file.exists(outputpath_X2_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X2_rank))
# }
#
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
#X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X1$Score_K <- "No Effect"
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X2$Score_K <- "No Effect"
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
X4$ORF <- gsub("_3","",x=X4$ORF)
X4$ORF <- gsub("_4","",x=X4$ORF)
X4$Score_L <- "No Effect"
X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X4$Score_K <- "No Effect"
X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
#
# X5$ORF <- X5$OrfRep
# X5$ORF <- gsub("_1","",x=X5$ORF)
# X5$ORF <- gsub("_2","",x=X5$ORF)
# X5$ORF <- gsub("_3","",x=X5$ORF)
# X5$ORF <- gsub("_4","",x=X5$ORF)
#
# X5$Score_L <- "No Effect"
# X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
# X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
# X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
#
# X5$Score_K <- "No Effect"
# X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
# X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
# X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
# X6$ORF <- X6$OrfRep
# X6$ORF <- gsub("_1","",x=X6$ORF)
# X6$ORF <- gsub("_2","",x=X6$ORF)
# X6$ORF <- gsub("_3","",x=X6$ORF)
# X6$ORF <- gsub("_4","",x=X6$ORF)
#
# X6$Score_L <- "No Effect"
# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
# X6$Score_K <- "No Effect"
# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X4
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X5
# X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
# X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X6
# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
X1$Rank_L <- rank(X1$Z_lm_L)
X1$Rank_K <- rank(X1$Z_lm_K)
X2$Rank_L <- rank(X2$Z_lm_L)
X2$Rank_K <- rank(X2$Z_lm_K)
X4$Rank_L <- rank(X4$Z_lm_L)
X4$Rank_K <- rank(X4$Z_lm_K)
# X5$Rank_L <- rank(X5$Z_lm_L)
# X5$Rank_K <- rank(X5$Z_lm_K)
# X6$Rank_L <- rank(X6$Z_lm_L)
# X6$Rank_K <- rank(X6$Z_lm_K)
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
#X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
#colnames(X5) <- paste(colnames(X5),"_X5",sep="")
#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
# colnames(X1)[1] <- "OrfRep"
# colnames(X2)[1] <- "OrfRep"
# colnames(X4)[1] <- "OrfRep"
# colnames(X5)[1] <- "OrfRep"
# colnames(X6)[1] <- "OrfRep"
X <- cbind(X1,X2,X4)
#print(dim(X))
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
#print(X[2700,])
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
X$ORF <- X$OrfRep_X1
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
#try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
#
# #express the na data as 0.001
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
#
# X$Overlap <- "No Effect"
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
#
# X$Overlap_K <- "No Effect"
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
#
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
#print(colnames(X_heatmap))
#break()
X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
#colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(X3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(X3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
}

View File

@@ -0,0 +1,729 @@
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library("gplots")
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
Name2 <- Args[4]
#Arg 5 is the GTF results 3
input_file3 <- Args[5]
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
Name3 <- Args[6]
#Arg 7 is GTF results 4
input_file4 <- Args[7]
#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
Name4 <- Args[8]
#Arg 9 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
ontology_obo_input <- Args[9]
#Arg 10 is the go_terms.tab file
GOtermstab_file <- Args[10]
#Arg 11 is the GO:ID - example: chromatin organization is GO:0006325
#for all processes use biological process GO:0008150
#all functions use molecular function GO:0003674
#all components use cellular component GO:0005575
GO_ID_Arg <- Args[11]
#arg 12 is the directory to put the results into (and create that directory if needed)
subDir <- Args[12]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[12]
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
# if (file.exists(outputpath_X1_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X1_rank))
# }
#
# if (file.exists(outputpath_X2_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X2_rank))
# }
#
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X1$Score_K <- "No Effect"
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X2$Score_K <- "No Effect"
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
X4$ORF <- gsub("_3","",x=X4$ORF)
X4$ORF <- gsub("_4","",x=X4$ORF)
X4$Score_L <- "No Effect"
X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X4$Score_K <- "No Effect"
X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X5$ORF <- X5$OrfRep
X5$ORF <- gsub("_1","",x=X5$ORF)
X5$ORF <- gsub("_2","",x=X5$ORF)
X5$ORF <- gsub("_3","",x=X5$ORF)
X5$ORF <- gsub("_4","",x=X5$ORF)
X5$Score_L <- "No Effect"
X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X5$Score_K <- "No Effect"
X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
# X6$ORF <- X6$OrfRep
# X6$ORF <- gsub("_1","",x=X6$ORF)
# X6$ORF <- gsub("_2","",x=X6$ORF)
# X6$ORF <- gsub("_3","",x=X6$ORF)
# X6$ORF <- gsub("_4","",x=X6$ORF)
#
# X6$Score_L <- "No Effect"
# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
# X6$Score_K <- "No Effect"
# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X4
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X5
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X6
# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
#X1$Rank_L <- rank(X1$Z_lm_L)
#X1$Rank_K <- rank(X1$Z_lm_K)
#X2$Rank_L <- rank(X2$Z_lm_L)
#X2$Rank_K <- rank(X2$Z_lm_K)
#X4$Rank_L <- rank(X4$Z_lm_L)
#X4$Rank_K <- rank(X4$Z_lm_K)
#X5$Rank_L <- rank(X5$Z_lm_L)
#X5$Rank_K <- rank(X5$Z_lm_K)
# X6$Rank_L <- rank(X6$Z_lm_L)
# X6$Rank_K <- rank(X6$Z_lm_K)
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
colnames(X1)[1] <- "OrfRep"
colnames(X2)[1] <- "OrfRep"
colnames(X4)[1] <- "OrfRep"
colnames(X5)[1] <- "OrfRep"
# colnames(X6)[1] <- "OrfRep"
#X <- cbind(X1,X2,X4,X5)
#print(dim(X))
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
#print(X[2700,])
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
X <- merge(X1,X2,by ="OrfRep",all=TRUE)
X <- merge(X,X4,by ="OrfRep",all=TRUE)
X <- merge(X,X5,by ="OrfRep",all=TRUE)
X$ORF <- X$OrfRep
print(dim(X))
print(colnames(X))
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
#remove new NAs
X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
X[is.na(X$Z_lm_K_X1),]$Z_lm_K_X1 <- 0.001
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X2
X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
X[is.na(X$Z_lm_K_X2),]$Z_lm_K_X2 <- 0.001
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X4
X[is.na(X$Z_lm_L_X4),]$Z_lm_L_X4 <- 0.001
X[is.na(X$Z_lm_K_X4),]$Z_lm_K_X4 <- 0.001
#express the na data as 0.001 in X5
X[is.na(X$Z_lm_L_X5),]$Z_lm_L_X5 <- 0.001
X[is.na(X$Z_lm_K_X5),]$Z_lm_K_X5 <- 0.001
try(X[is.na(X$Gene_X1),]$Gene_X1 <- "")
try(X[is.na(X$Gene_X2),]$Gene_X2 <- "")
try(X[is.na(X$Gene_X4),]$Gene_X4 <- "")
try(X[is.na(X$Gene_X5),]$Gene_X5 <- "")
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep)
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep)
#write.csv(X,paste(outputpath,"18_0614_all_3.csv",sep=""))
#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
#
# #express the na data as 0.001
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
#
# X$Overlap <- "No Effect"
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
#
# X$Overlap_K <- "No Effect"
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
#
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
print(head(X))
write.csv(X,paste(outputpath,"18_0614_all_2.csv",sep=""))
write.csv(X1,paste(outputpath,"18_0614_all_X1.csv",sep=""))
write.csv(X2,paste(outputpath,"18_0614_all_X2.csv",sep=""))
write.csv(X4,paste(outputpath,"18_0614_all_X4.csv",sep=""))
write.csv(X5,paste(outputpath,"18_0614_all_X5.csv",sep=""))
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
#print(colnames(X_heatmap))
#break()
print(colnames(X_heatmap))
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
write.csv(X_heatmap,paste(outputpath,"18_0614_all.csv",sep=""))
#break()
for(s in 1:dim(X3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(X3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 2 && Parent_Size <= 30){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
}

View File

@@ -0,0 +1,720 @@
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library("gplots")
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
Name2 <- Args[4]
#Arg 5 is the GTF results 3
input_file3 <- Args[5]
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
Name3 <- Args[6]
#Arg 7 is GTF results 4
input_file4 <- Args[7]
#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
Name4 <- Args[8]
#Arg 9 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
ontology_obo_input <- Args[9]
#Arg 10 is the go_terms.tab file
GOtermstab_file <- Args[10]
#Arg 11 is the GO:ID - example: chromatin organization is GO:0006325
#for all processes use biological process GO:0008150
#all functions use molecular function GO:0003674
#all components use cellular component GO:0005575
GO_ID_Arg <- Args[11]
#arg 12 is the directory to put the results into (and create that directory if needed)
subDir <- Args[12]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[12]
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
# if (file.exists(outputpath_X1_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X1_rank))
# }
#
# if (file.exists(outputpath_X2_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X2_rank))
# }
#
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X1$Score_K <- "No Effect"
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X2$Score_K <- "No Effect"
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
X4$ORF <- gsub("_3","",x=X4$ORF)
X4$ORF <- gsub("_4","",x=X4$ORF)
X4$Score_L <- "No Effect"
X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X4$Score_K <- "No Effect"
X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X5$ORF <- X5$OrfRep
X5$ORF <- gsub("_1","",x=X5$ORF)
X5$ORF <- gsub("_2","",x=X5$ORF)
X5$ORF <- gsub("_3","",x=X5$ORF)
X5$ORF <- gsub("_4","",x=X5$ORF)
X5$Score_L <- "No Effect"
X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X5$Score_K <- "No Effect"
X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
# X6$ORF <- X6$OrfRep
# X6$ORF <- gsub("_1","",x=X6$ORF)
# X6$ORF <- gsub("_2","",x=X6$ORF)
# X6$ORF <- gsub("_3","",x=X6$ORF)
# X6$ORF <- gsub("_4","",x=X6$ORF)
#
# X6$Score_L <- "No Effect"
# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
# X6$Score_K <- "No Effect"
# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X4
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X5
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X6
# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
X1$Rank_L <- rank(X1$Z_lm_L)
X1$Rank_K <- rank(X1$Z_lm_K)
X2$Rank_L <- rank(X2$Z_lm_L)
X2$Rank_K <- rank(X2$Z_lm_K)
X4$Rank_L <- rank(X4$Z_lm_L)
X4$Rank_K <- rank(X4$Z_lm_K)
X5$Rank_L <- rank(X5$Z_lm_L)
X5$Rank_K <- rank(X5$Z_lm_K)
# X6$Rank_L <- rank(X6$Z_lm_L)
# X6$Rank_K <- rank(X6$Z_lm_K)
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
# colnames(X1)[1] <- "OrfRep"
# colnames(X2)[1] <- "OrfRep"
# colnames(X4)[1] <- "OrfRep"
# colnames(X5)[1] <- "OrfRep"
# colnames(X6)[1] <- "OrfRep"
X <- cbind(X1,X2,X4,X5)
#print(dim(X))
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
#print(X[2700,])
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
X$ORF <- X$OrfRep_X1
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
#
# #express the na data as 0.001
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
#
# X$Overlap <- "No Effect"
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
#
# X$Overlap_K <- "No Effect"
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
#
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
#print(colnames(X_heatmap))
#break()
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(X3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(X3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
#print(X0)
if(dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
}

View File

@@ -0,0 +1,754 @@
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library(gplots)
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
Name2 <- Args[4]
#Arg 5 is the GTF results 3
input_file3 <- Args[5]
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
Name3 <- Args[6]
#Arg 7 is GTF results 4
input_file4 <- Args[7]
#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
Name4 <- Args[8]
#Arg 9 is GTF results 5
input_file5 <- Args[9]
#Arg 10 is the name of Interaction score file (Zscores_Interaction.csv) 5 to print in the results
Name5 <- Args[10]
#Arg 11 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
ontology_obo_input <- Args[11]
#Arg 12 is the go_terms.tab file
GOtermstab_file <- Args[12]
#Arg 13 is the GO:ID - example: chromatin organization is GO:0006325
#for all processes use biological process GO:0008150
#all functions use molecular function GO:0003674
#all components use cellular component GO:0005575
GO_ID_Arg <- Args[13]
#arg 14 is the directory to put the results into (and create that directory if needed)
subDir <- Args[14]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[14]
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
# if (file.exists(outputpath_X1_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X1_rank))
# }
#
# if (file.exists(outputpath_X2_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X2_rank))
# }
#
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X1$Score_K <- "No Effect"
try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X2$Score_K <- "No Effect"
try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
X4$ORF <- gsub("_3","",x=X4$ORF)
X4$ORF <- gsub("_4","",x=X4$ORF)
X4$Score_L <- "No Effect"
try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X4$Score_K <- "No Effect"
try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
X5$ORF <- X5$OrfRep
X5$ORF <- gsub("_1","",x=X5$ORF)
X5$ORF <- gsub("_2","",x=X5$ORF)
X5$ORF <- gsub("_3","",x=X5$ORF)
X5$ORF <- gsub("_4","",x=X5$ORF)
X5$Score_L <- "No Effect"
try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X5$Score_K <- "No Effect"
try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
X6$ORF <- X6$OrfRep
X6$ORF <- gsub("_1","",x=X6$ORF)
X6$ORF <- gsub("_2","",x=X6$ORF)
X6$ORF <- gsub("_3","",x=X6$ORF)
X6$ORF <- gsub("_4","",x=X6$ORF)
X6$Score_L <- "No Effect"
try(X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth")
try(X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X6$Score_K <- "No Effect"
try(X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth")
try(X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X4
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X5
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
#express the na data as 0.001 in X6
X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
X1$Rank_L <- rank(X1$Z_lm_L)
X1$Rank_K <- rank(X1$Z_lm_K)
X2$Rank_L <- rank(X2$Z_lm_L)
X2$Rank_K <- rank(X2$Z_lm_K)
X4$Rank_L <- rank(X4$Z_lm_L)
X4$Rank_K <- rank(X4$Z_lm_K)
X5$Rank_L <- rank(X5$Z_lm_L)
X5$Rank_K <- rank(X5$Z_lm_K)
X6$Rank_L <- rank(X6$Z_lm_L)
X6$Rank_K <- rank(X6$Z_lm_K)
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
colnames(X6) <- paste(colnames(X6),"_X6",sep="")
# colnames(X1)[1] <- "OrfRep"
# colnames(X2)[1] <- "OrfRep"
# colnames(X4)[1] <- "OrfRep"
# colnames(X5)[1] <- "OrfRep"
# colnames(X6)[1] <- "OrfRep"
X <- cbind(X1,X2,X4,X5,X6)
#print(dim(X))
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
#print(X[2700,])
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
X$ORF <- X$OrfRep_X1
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
#
# #express the na data as 0.001
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
#
# X$Overlap <- "No Effect"
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
#
# X$Overlap_K <- "No Effect"
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
#
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_K_X6" | colnames(X) == "Z_lm_K_X6" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" |
colnames(X) == "Z_Shift_L_X6" | colnames(X) == "Z_lm_L_X6"]
#print(colnames(X_heatmap))
#break()
X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(X3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(X3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
}

View File

@@ -0,0 +1,622 @@
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library(gplots)
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the GTF results 1
input_file1 <- Args[1]
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
Name1 <- Args[2]
#Arg 3 is GTF results 3
input_file2 <- Args[3]
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
Name2 <- Args[4]
#Arg 5 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
ontology_obo_input <- Args[5]
#Arg 6 is the go_terms.tab file
GOtermstab_file <- Args[6]
#Arg 7 is the GO:ID - example: chromatin organization is GO:0006325
#for all processes use biological process GO:0008150
#all functions use molecular function GO:0003674
#all components use cellular component GO:0005575
GO_ID_Arg <- Args[7]
#arg 8 is the directory to put the results into (and create that directory if needed)
subDir <- Args[8]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[8]
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
# if (file.exists(outputpath_X1_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X1_rank))
# }
#
# if (file.exists(outputpath_X2_rank)){
# #outputpath <- subDir
# } else {
# dir.create(file.path(outputpath_X2_rank))
# }
#
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
#Name1 <- "DOXO_HLD"
#Name2 <- "DOXO_HLEG"
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X1$Score_K <- "No Effect"
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
X2$Score_K <- "No Effect"
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
X1$Rank_L <- rank(X1$Z_lm_L)
X1$Rank_K <- rank(X1$Z_lm_K)
X2$Rank_L <- rank(X2$Z_lm_L)
X2$Rank_K <- rank(X2$Z_lm_K)
X <- merge(X1,X2,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2"))
X$ORF <- X$OrfRep
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep)
#express the na data as 0.001
#X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
#X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
X$Overlap <- "No Effect"
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
X$Overlap_K <- "No Effect"
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(X3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(X3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
}

View File

@@ -0,0 +1,239 @@
######This code is to generate average GO term scores for all gene deletions from Zscores for a Q-HTCP screen
#Run using Rscript from the command line
#Rscript 18_0119_ScoreAllGOTerms_From.R InteractionScores.csv go_terms.tab gene_association.sgd output_directory
#requires an input ZScores_Interaction.csv file generated by SumZscore_Interaction.R after applying code to generate average Z score for each deletion
#in a Q-HTCP screen
#requires various packages as called below (need bioconductor installed to use "org.Sc.sgd.db")
#also requires some files from SGD (https://www.yeastgenome.org/) and can be downloaded from their website
#requires the go_terms.tab and gene_association.sgd
#Script will take ZScores_Interaction.csv file and calculate the average Zscore for every GO term
#Output files will include all GO terms, GO terms curated so that only terms with an average score + or - the SD remains above 2 or below -2
#Various other output files will give some statistics on the size of GO terms and the average scores, the SD, etc.
library("stringr")
library("org.Sc.sgd.db")
library("plyr")
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the ZScores_Interaction.csv
input_file <- Args[1]
#arg 2 is the go_terms.tab
#https://downloads.yeastgenome.org/curation/literature/go_terms.tab
SGD_Terms_file <- Args[2]
#arg 3 is the gene_association.sgd
#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
SGD_features_file <- Args[3]
#arg 4 is the directory to put the results into (and create that directory if needed)
subDir <- Args[4]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[4]
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/"
#X is the Z score data
#should have ORF in first column, Gene in second column, and then Scores to be averaged in the next columns (3:max)
#make sure NA values are included (don't use 0.001) - 0.001 is needed for clustering but this will incorrectly calculate average GTF scores
X <- read.csv(file = input_file,stringsAsFactors=FALSE,header = TRUE)
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
if(colnames(X)[1] == "OrfRep"){
colnames(X)[1] <- "ORF"
}
#Terms is the GO term list
Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#SGD features (not needed)
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/SGD_features.tab",header=FALSE,quote = "",col.names = c("SGD_ID","Feature_Type","Qualifier","ORF","Gene","Alias","Parent_Feature","Secondardy_SGD_ID","Chromosome","Start_Coordinate","Stop_Coordinate","Strand","Genetic_Position","Coordinate_Position","Sequence_Version","Description"))
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Get the ORF names associated with each gene/GO term
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Get the numeric GO ID for matching
Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#get all unique GO terms
GO_Terms <- unique(Gene_Association$GO_ID)
#create a character vector with just the ColNames of the input file to store the scores for each GO term
Col_Names_X <- colnames(X)
#create a data_frame with header from input_file
GO_Term_Averages <- X[0,]
#fill table with NAs same length as number of GO terms
GO_Term_Averages[1:length(GO_Terms),] <- NA
#change the first and second col names to GO_ID and Term
colnames(GO_Term_Averages)[1] <- "GO_ID"
colnames(GO_Term_Averages)[2] <- "Term"
#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
GO_Term_Averages$Ontology <- NA
GO_Term_Averages$NumGenes <- NA
GO_Term_Averages$AllPossibleGenes <- NA
GO_Term_Averages$Genes <- NA
GO_Term_Averages$ORFs <- NA
#create a data.frame for the standard deviation info
GO_Term_SD <- X[0,]
GO_Term_SD[1:length(GO_Terms),] <- NA
colnames(GO_Term_SD)[1] <- "GO_ID"
colnames(GO_Term_SD)[2] <- "Term"
#GO_Term_SD$Ontology <- NA
#GO_Term_SD$NumGenes <- NA
#GO_Term_SD$AllPossibleGenes <- NA
#Loop for each GO term to get an average L and K Z score
for(i in 1:length(GO_Terms)){
#get the GO_Term
ID <- GO_Terms[i]
#Get data.frame for all genes associated to the GO Term
ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
#get a vector of just the gene names
ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
if(length(unique(ID_AllGenes_vector)) > 4000){
next()
}
#get the GO term character description where numeric Terms ID matches GO_Term's ID
GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
#get the Z scores for all genes in the GO_ID
Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
#get the Gene names and ORFs for the term
GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
#dataframe to report the averages for a GO term
#get the GO ID
GO_Term_Averages$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_Averages$Term[i] <- GO_Description_Term
#get total number of genes annotated to the Term that we have in our library
GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
#get total number of genes annotated to the Term in SGD
GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
#get the ontology of the term
GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
#calculate the average score for every column
for(j in 3:length(X[1,])){
GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
#also calculate same values for the SD
GO_Term_SD$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_SD$Term[i] <- GO_Description_Term
#calculate column scores for SD
for(j in 3:length(X[1,])){
GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
}
#add either _Avg or _SD depending on if the calculated score is an average or SD
colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
#combine the averages with the SDs to make one big data.frame
X2 <- cbind(GO_Term_Averages,GO_Term_SD)
#test[ , order(names(test))]
X2 <- X2[,order(names(X2))]
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
#create output file
write.csv(X2,file=paste(outputpath,"Average_GOTerms_All.csv",sep=""),row.names=FALSE)
#remove NAs
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
#identify redundant GO terms
for(i in 1:length(X3[,1])){
#loop through each GO term - get term
GO_term_ID <- as.character(X3$GO_ID_Avg[i])
#get term in the X3
X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
#get anywhere that has the same number K_Avg value
X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
if(length(X3_Temp2[,1]) > 1){
if(length(unique(X3_Temp2$Genes_Avg)) == 1){
X3_Temp2 <- X3_Temp2[1,]
}
}
if(i == 1){
Y <- X3_Temp2
}
if(i > 1){
Y <- rbind(Y,X3_Temp2)
}
}
Y1 <- unique(Y)
write.csv(Y1,file=paste(outputpath,"Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
write.csv(Y2,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
write.csv(Y3,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
write.csv(Y4,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
write.csv(Y5,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)

View File

@@ -0,0 +1,239 @@
######This code is to generate average GO term scores for all gene deletions from Zscores for a Q-HTCP screen
#Run using Rscript from the command line
#Rscript 18_0119_ScoreAllGOTerms_From.R InteractionScores.csv go_terms.tab gene_association.sgd output_directory
#requires an input ZScores_Interaction.csv file generated by SumZscore_Interaction.R after applying code to generate average Z score for each deletion
#in a Q-HTCP screen
#requires various packages as called below (need bioconductor installed to use "org.Sc.sgd.db")
#also requires some files from SGD (https://www.yeastgenome.org/) and can be downloaded from their website
#requires the go_terms.tab and gene_association.sgd
#Script will take ZScores_Interaction.csv file and calculate the average Zscore for every GO term
#Output files will include all GO terms, GO terms curated so that only terms with an average score + or - the SD remains above 2 or below -2
#Various other output files will give some statistics on the size of GO terms and the average scores, the SD, etc.
library("stringr")
library("org.Sc.sgd.db")
library("plyr")
#build in command args to apply this code to a given !!results sheet
Args <- commandArgs(TRUE)
#Arg 1 is the ZScores_Interaction.csv
input_file <- Args[1]
#arg 2 is the go_terms.tab
#https://downloads.yeastgenome.org/curation/literature/go_terms.tab
SGD_Terms_file <- Args[2]
#arg 3 is the gene_association.sgd
#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
SGD_features_file <- Args[3]
#arg 4 is the directory to put the results into (and create that directory if needed)
subDir <- Args[4]
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
#define the output path (as fourth argument from Rscript)
outputpath <- Args[4]
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/"
#X is the Z score data
#should have ORF in first column, Gene in second column, and then Scores to be averaged in the next columns (3:max)
#make sure NA values are included (don't use 0.001) - 0.001 is needed for clustering but this will incorrectly calculate average GTF scores
X <- read.csv(file = input_file,stringsAsFactors=FALSE,header = TRUE)
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
if(colnames(X)[1] == "OrfRep"){
colnames(X)[1] <- "ORF"
}
#Terms is the GO term list
Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#SGD features (not needed)
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/SGD_features.tab",header=FALSE,quote = "",col.names = c("SGD_ID","Feature_Type","Qualifier","ORF","Gene","Alias","Parent_Feature","Secondardy_SGD_ID","Chromosome","Start_Coordinate","Stop_Coordinate","Strand","Genetic_Position","Coordinate_Position","Sequence_Version","Description"))
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Get the ORF names associated with each gene/GO term
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Get the numeric GO ID for matching
Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#get all unique GO terms
GO_Terms <- unique(Gene_Association$GO_ID)
#create a character vector with just the ColNames of the input file to store the scores for each GO term
Col_Names_X <- colnames(X)
#create a data_frame with header from input_file
GO_Term_Averages <- X[0,]
#fill table with NAs same length as number of GO terms
GO_Term_Averages[1:length(GO_Terms),] <- NA
#change the first and second col names to GO_ID and Term
colnames(GO_Term_Averages)[1] <- "GO_ID"
colnames(GO_Term_Averages)[2] <- "Term"
#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
GO_Term_Averages$Ontology <- NA
GO_Term_Averages$NumGenes <- NA
GO_Term_Averages$AllPossibleGenes <- NA
GO_Term_Averages$Genes <- NA
GO_Term_Averages$ORFs <- NA
#create a data.frame for the standard deviation info
GO_Term_SD <- X[0,]
GO_Term_SD[1:length(GO_Terms),] <- NA
colnames(GO_Term_SD)[1] <- "GO_ID"
colnames(GO_Term_SD)[2] <- "Term"
#GO_Term_SD$Ontology <- NA
#GO_Term_SD$NumGenes <- NA
#GO_Term_SD$AllPossibleGenes <- NA
#Loop for each GO term to get an average L and K Z score
for(i in 1:length(GO_Terms)){
#get the GO_Term
ID <- GO_Terms[i]
#Get data.frame for all genes associated to the GO Term
ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
#get a vector of just the gene names
ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
if(length(unique(ID_AllGenes_vector)) > 4000){
next()
}
#get the GO term character description where numeric Terms ID matches GO_Term's ID
GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
#get the Z scores for all genes in the GO_ID
Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
#get the Gene names and ORFs for the term
GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
#dataframe to report the averages for a GO term
#get the GO ID
GO_Term_Averages$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_Averages$Term[i] <- GO_Description_Term
#get total number of genes annotated to the Term that we have in our library
GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
#get total number of genes annotated to the Term in SGD
GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
#get the ontology of the term
GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
#calculate the average score for every column
for(j in 3:length(X[1,])){
GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
#also calculate same values for the SD
GO_Term_SD$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_SD$Term[i] <- GO_Description_Term
#calculate column scores for SD
for(j in 3:length(X[1,])){
GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
}
#add either _Avg or _SD depending on if the calculated score is an average or SD
colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
#combine the averages with the SDs to make one big data.frame
X2 <- cbind(GO_Term_Averages,GO_Term_SD)
#test[ , order(names(test))]
X2 <- X2[,order(names(X2))]
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
#create output file
write.csv(X2,file=paste(outputpath,"Average_GOTerms_All.csv",sep=""),row.names=FALSE)
#remove NAs
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
#identify redundant GO terms
for(i in 1:length(X3[,1])){
#loop through each GO term - get term
GO_term_ID <- as.character(X3$GO_ID_Avg[i])
#get term in the X3
X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
#get anywhere that has the same number K_Avg value
X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
if(length(X3_Temp2[,1]) > 1){
if(length(unique(X3_Temp2$Genes_Avg)) == 1){
X3_Temp2 <- X3_Temp2[1,]
}
}
if(i == 1){
Y <- X3_Temp2
}
if(i > 1){
Y <- rbind(Y,X3_Temp2)
}
}
Y1 <- unique(Y)
write.csv(Y1,file=paste(outputpath,"Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
write.csv(Y2,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
write.csv(Y3,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
write.csv(Y4,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
write.csv(Y5,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)