Commit earlier refactoring
This commit is contained in:
@@ -0,0 +1,296 @@
|
||||
timestamp()
|
||||
#version 10 - edited the way clusters are found - problem with clusters 1-0-1 having genes in it from 1-0-11
|
||||
|
||||
Args <- commandArgs(TRUE)
|
||||
#need to give the input "finalTable.csv" file after running REMc generated by eclipse
|
||||
input_finalTable <- Args[1]
|
||||
|
||||
subDir <- Args[2]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
|
||||
outputpath <- Args[2]
|
||||
|
||||
library(RColorBrewer)
|
||||
library(gplots)
|
||||
|
||||
hmapfile <- data.frame(read.csv(file=input_finalTable,header=TRUE,sep=",",stringsAsFactors = FALSE))
|
||||
|
||||
#set NAs to NA
|
||||
hmapfile[hmapfile == -100] <- NA
|
||||
hmapfile[hmapfile == 100] <- NA
|
||||
hmapfile[hmapfile == 0.001] <- NA
|
||||
hmapfile[hmapfile == -0.001] <- NA
|
||||
|
||||
|
||||
#select the number of rows based on the number of genes
|
||||
num_total_genes <- length(hmapfile[,1])
|
||||
|
||||
#break out the cluster names so each part of the cluster origin can be accessed
|
||||
#line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
|
||||
#hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
|
||||
|
||||
hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
|
||||
hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
|
||||
#use tail(x,n) for accessing the outward most cluster
|
||||
|
||||
clust_rounds <- 0
|
||||
for(i in 1:num_total_genes){
|
||||
if(length(hmapfile$cluster.origin[[i]]) > clust_rounds){
|
||||
clust_rounds <- length(hmapfile$cluster.origin[[i]])
|
||||
}
|
||||
}
|
||||
|
||||
unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
|
||||
unique_clusts <- unique_clusts[unique_clusts != " "]
|
||||
|
||||
#select only the unique cluster names
|
||||
unique_clusts <- sort(unique(unlist(unique_clusts,use.names= FALSE)),decreasing=FALSE)
|
||||
num_unique_clusts <- length(unique_clusts)
|
||||
|
||||
#base the color key on a statistical analysis of the L and K data
|
||||
#need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
|
||||
#scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
|
||||
|
||||
#hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
|
||||
|
||||
#change so that the L data is multiplied to be on the same scale as the K data
|
||||
|
||||
KEY_MIN <- 0
|
||||
KEY_MAX <- 0
|
||||
K_MIN <- 0
|
||||
L_MAX <- 0
|
||||
KcolumnValues <- vector()
|
||||
LcolumnValues <- vector()
|
||||
|
||||
for(i in 4:(length(hmapfile[1,]) - 2)){
|
||||
if(grepl("_Z_lm_K",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
|
||||
KcolumnValues <- append(KcolumnValues,i)
|
||||
}
|
||||
if(grepl("_Z_lm_L",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
|
||||
LcolumnValues <- append(LcolumnValues,i)
|
||||
}
|
||||
}
|
||||
|
||||
#L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[4]
|
||||
#K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[2]
|
||||
|
||||
#L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm=TRUE)[4]
|
||||
#K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm=TRUE)[2]
|
||||
|
||||
#Z scores are
|
||||
L_MAX <- 12
|
||||
K_MIN <- -12
|
||||
|
||||
#L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
|
||||
#hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
|
||||
|
||||
#if(grepl("SHIFT",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
|
||||
# print("FOUND SHIFT VALUES")
|
||||
# hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
|
||||
#}
|
||||
|
||||
#KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
|
||||
#KEY_MIN <- as.numeric(K_MIN)
|
||||
|
||||
KEY_MAX <- as.numeric(L_MAX)
|
||||
KEY_MIN <- as.numeric(K_MIN)
|
||||
|
||||
print(KEY_MIN)
|
||||
print(L_MAX)
|
||||
#print(L_Multiplier)
|
||||
|
||||
colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
|
||||
#print(colormapbreaks)
|
||||
|
||||
#probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
|
||||
#however since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
|
||||
#then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
|
||||
#could we pool all the shift data/average it?
|
||||
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
|
||||
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=2)
|
||||
#ev_repeat = rep("white",length(even_columns))
|
||||
#ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
|
||||
#middle_col <- (length(hmapfile[1,]) - 5)/2
|
||||
#ev_repeat[(middle_col/2)] <- "black"
|
||||
#print(ev_repeat)
|
||||
}
|
||||
|
||||
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == FALSE){
|
||||
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=1)
|
||||
print("NO SHIFT VALS FOUND")
|
||||
}
|
||||
|
||||
#FOR THIS SCRIPT ONLY (rap tem hu script)
|
||||
#even_columns <- c(2,5,7,10,12,15,17)
|
||||
|
||||
#m <- 0
|
||||
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1,]) - 2)])
|
||||
#print(colnames_edit)
|
||||
for(i in 1:length(colnames_edit)){
|
||||
if(grepl("Shift",colnames_edit[i],fixed=TRUE) == TRUE){
|
||||
colnames_edit[i] <- ""
|
||||
colnames_edit[i+1] <- gsub(pattern = "_Z_lm_",replacement = " ",x = colnames_edit[i+1])
|
||||
try(colnames_edit[i+1] <- gsub(pattern = "_",replacement = " ",x = colnames_edit[i+1]))
|
||||
|
||||
# INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
|
||||
# print(length(unlist(INT_store)))
|
||||
# if(length(unlist(INT_store)) == 4){
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
|
||||
# }
|
||||
# if(length(unlist(INT_store)) == 3){
|
||||
#
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
|
||||
# }
|
||||
# if(length(unlist(INT_store)) == 5){
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep=" ")
|
||||
# }
|
||||
# if(length(unlist(INT_store)) == 6){
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
|
||||
# }
|
||||
|
||||
}
|
||||
}
|
||||
print(colnames_edit)
|
||||
#break()
|
||||
#colnames_edit[5] <- "TEM HLEG K"
|
||||
#colnames_edit[10] <- "TEM HL K"
|
||||
#colnames_edit[15] <- "TEM HLEG L"
|
||||
#colnames_edit[20] <- "TEM HL L"
|
||||
|
||||
|
||||
#create the heatmaps
|
||||
for(i in 1:num_unique_clusts){
|
||||
cluster <- unique_clusts[i]
|
||||
cluster_data <- subset(hmapfile,grepl(cluster,cluster.origin))
|
||||
cluster_length <- length(cluster_data[,1])
|
||||
if(cluster_length != 1){
|
||||
X0 <- as.matrix(cluster_data[,4:(length(hmapfile[1,]) - 2)])
|
||||
if(cluster_length >= 2001){
|
||||
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=20,width=15)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor= "white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
#cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
|
||||
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
#abline(v=0.5467,col="black")
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 201 && cluster_length <= 2000){
|
||||
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=15,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
|
||||
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
#abline(v=0.5316,col="black")
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 150 && cluster_length <= 200){
|
||||
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=12,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.2, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 101 && cluster_length <= 149){
|
||||
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,mypath,height=12,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 60 && cluster_length <= 100){
|
||||
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=12,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
|
||||
breaks=colormapbreaks,symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length <= 59 && cluster_length >= 30){
|
||||
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=9,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length <= 29){
|
||||
mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=7,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA,
|
||||
distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
}
|
||||
#print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
|
||||
}
|
||||
|
||||
|
||||
timestamp()
|
||||
|
||||
39
workflow/.old/apps/r/SSscripts/22_0603_Remy_Exclude_DAmPs.R
Normal file
39
workflow/.old/apps/r/SSscripts/22_0603_Remy_Exclude_DAmPs.R
Normal file
@@ -0,0 +1,39 @@
|
||||
# This version of Exclude_DAmPs was modified by Remy. It
|
||||
# assumes that underscores included in OrfRep names are
|
||||
# already in the DAmPList, because in some cases an OrfRep
|
||||
# may have _1 being a non-DAmP and _2 being a DAmP. It is
|
||||
# not a general rule so it is better just to use a list
|
||||
# generated from the Master Plate sheet directly rather
|
||||
# than trying to deal with underscores in this script.
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the input file (ZScores_Interaction.csv, REMcReady.csv, other files with OrfRep col) for your genome wide YKO/YKD experiment
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 we need to supply the DAmPs list so we can remove these genes
|
||||
DAmPs_List <- Args[2]
|
||||
|
||||
#Arg 3 is the output file
|
||||
output_file <- Args[3]
|
||||
|
||||
X <- read.csv(file=input_file1,stringsAsFactors = FALSE)
|
||||
Damps <- read.delim(DAmPs_List,header=F)
|
||||
|
||||
#create a column in X1 called ORF so we can remove OrfRep numbers and find all the DAmPs
|
||||
X$ORF <- X$OrfRep
|
||||
# Sean: remove _1-4 from newly created ORF column
|
||||
# Remy: following 4 lines are unnecessary
|
||||
# X$ORF <- gsub("_1","",x=X$ORF)
|
||||
# X$ORF <- gsub("_2","",x=X$ORF)
|
||||
# X$ORF <- gsub("_3","",x=X$ORF)
|
||||
# X$ORF <- gsub("_4","",x=X$ORF)
|
||||
|
||||
X <- X[!(X$ORF %in% Damps$V1),]
|
||||
|
||||
last_col <- dim(X)[2]
|
||||
|
||||
X <- X[,1:(last_col-1)]
|
||||
|
||||
write.csv(X,file = output_file,row.names = FALSE)
|
||||
@@ -0,0 +1,374 @@
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
library(htmlwidgets)
|
||||
library(extrafont)
|
||||
library(grid)
|
||||
library(ggthemes)
|
||||
|
||||
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
|
||||
#Arg1 is Average_GOTerms_All_1.csv
|
||||
#Arg2 is the name to give GTF results 1
|
||||
#Arg3 is Average_GOTerms_All2.csv
|
||||
#Arg4 is the name to give GTF results 2
|
||||
#Arg5 is the directory to put the files into
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of GTF results 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of GTF results 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#arg 5 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[5]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[5]
|
||||
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
|
||||
|
||||
|
||||
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
|
||||
|
||||
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
|
||||
|
||||
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
|
||||
|
||||
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
|
||||
|
||||
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
|
||||
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
|
||||
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
|
||||
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
|
||||
|
||||
|
||||
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
|
||||
|
||||
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
#only output GTF terms where average score is still above 2 after subtracting the SD
|
||||
#Z1 will ID aggravators, Z2 alleviators
|
||||
Z1 <- X
|
||||
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
|
||||
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
|
||||
|
||||
Z2 <- X
|
||||
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
|
||||
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
|
||||
|
||||
|
||||
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
|
||||
|
||||
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
|
||||
|
||||
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
|
||||
|
||||
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
|
||||
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
|
||||
|
||||
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)),]
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
|
||||
X_abovethreshold$X2_Rank <- NA
|
||||
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
@@ -0,0 +1,374 @@
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
library(htmlwidgets)
|
||||
library(extrafont)
|
||||
library(grid)
|
||||
library(ggthemes)
|
||||
|
||||
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
|
||||
#Arg1 is Average_GOTerms_All_1.csv
|
||||
#Arg2 is the name to give GTF results 1
|
||||
#Arg3 is Average_GOTerms_All2.csv
|
||||
#Arg4 is the name to give GTF results 2
|
||||
#Arg5 is the directory to put the files into
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of GTF results 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of GTF results 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#arg 5 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[5]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[5]
|
||||
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
|
||||
|
||||
|
||||
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
|
||||
|
||||
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
|
||||
|
||||
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
|
||||
|
||||
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
|
||||
|
||||
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
|
||||
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
|
||||
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
|
||||
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
|
||||
|
||||
|
||||
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
|
||||
|
||||
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
#only output GTF terms where average score is still above 2 after subtracting the SD
|
||||
#Z1 will ID aggravators, Z2 alleviators
|
||||
Z1 <- X
|
||||
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
|
||||
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
|
||||
|
||||
Z2 <- X
|
||||
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
|
||||
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
|
||||
|
||||
|
||||
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
|
||||
|
||||
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
|
||||
|
||||
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
|
||||
|
||||
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
|
||||
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
|
||||
|
||||
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)),]
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
|
||||
X_abovethreshold$X2_Rank <- NA
|
||||
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
@@ -0,0 +1,374 @@
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
library(htmlwidgets)
|
||||
library(extrafont)
|
||||
library(grid)
|
||||
library(ggthemes)
|
||||
|
||||
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
|
||||
#Arg1 is Average_GOTerms_All_1.csv
|
||||
#Arg2 is the name to give GTF results 1
|
||||
#Arg3 is Average_GOTerms_All2.csv
|
||||
#Arg4 is the name to give GTF results 2
|
||||
#Arg5 is the directory to put the files into
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of GTF results 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of GTF results 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#arg 5 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[5]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[5]
|
||||
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
|
||||
|
||||
|
||||
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
|
||||
|
||||
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
|
||||
|
||||
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
|
||||
|
||||
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
|
||||
|
||||
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
|
||||
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
|
||||
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
|
||||
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
|
||||
|
||||
|
||||
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
|
||||
|
||||
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
|
||||
|
||||
#only output GTF terms where average score is still above 2 after subtracting the SD
|
||||
#Z1 will ID aggravators, Z2 alleviators
|
||||
Z1 <- X
|
||||
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
|
||||
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
|
||||
|
||||
Z2 <- X
|
||||
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
|
||||
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
|
||||
|
||||
|
||||
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
|
||||
|
||||
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
|
||||
|
||||
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
|
||||
|
||||
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
|
||||
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
|
||||
|
||||
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)),]
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
|
||||
X_abovethreshold$X2_Rank <- NA
|
||||
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
saveWidget(pgg, plotly_path,
|
||||
selfcontained =TRUE)
|
||||
|
||||
write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
@@ -0,0 +1,739 @@
|
||||
library("ontologyIndex")
|
||||
library("ggplot2")
|
||||
library("RColorBrewer")
|
||||
library("grid")
|
||||
library("ggthemes")
|
||||
#library("plotly")
|
||||
#library("htmlwidgets")
|
||||
library("extrafont")
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("ggrepel")
|
||||
library("gplots")
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#Arg 5 is the GTF results 3
|
||||
input_file3 <- Args[5]
|
||||
|
||||
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
|
||||
Name3 <- Args[6]
|
||||
|
||||
|
||||
#Arg 7 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
|
||||
ontology_obo_input <- Args[7]
|
||||
|
||||
#Arg 8 is the go_terms.tab file
|
||||
GOtermstab_file <- Args[8]
|
||||
|
||||
#Arg 9 is the GO:ID - example: chromatin organization is GO:0006325
|
||||
#for all processes use biological process GO:0008150
|
||||
#all functions use molecular function GO:0003674
|
||||
#all components use cellular component GO:0005575
|
||||
GO_ID_Arg <- Args[9]
|
||||
|
||||
#arg 10 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[10]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[10]
|
||||
|
||||
|
||||
|
||||
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
|
||||
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
|
||||
|
||||
|
||||
# if (file.exists(outputpath_X1_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X1_rank))
|
||||
# }
|
||||
#
|
||||
# if (file.exists(outputpath_X2_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X2_rank))
|
||||
# }
|
||||
#
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
|
||||
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
|
||||
|
||||
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
|
||||
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
|
||||
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
|
||||
#www.geneontology.org/ontology/gene_ontology_edit.obo file
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
|
||||
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
|
||||
X1$ORF <- X1$OrfRep
|
||||
X1$ORF <- gsub("_1","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_2","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_3","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_4","",x=X1$ORF)
|
||||
|
||||
X1$Score_L <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X1$Score_K <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
|
||||
|
||||
X2$ORF <- X2$OrfRep
|
||||
X2$ORF <- gsub("_1","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_2","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_3","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_4","",x=X2$ORF)
|
||||
|
||||
X2$Score_L <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X2$Score_K <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
X4$ORF <- X4$OrfRep
|
||||
X4$ORF <- gsub("_1","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_2","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_3","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_4","",x=X4$ORF)
|
||||
|
||||
X4$Score_L <- "No Effect"
|
||||
X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X4$Score_K <- "No Effect"
|
||||
X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
#
|
||||
# X5$ORF <- X5$OrfRep
|
||||
# X5$ORF <- gsub("_1","",x=X5$ORF)
|
||||
# X5$ORF <- gsub("_2","",x=X5$ORF)
|
||||
# X5$ORF <- gsub("_3","",x=X5$ORF)
|
||||
# X5$ORF <- gsub("_4","",x=X5$ORF)
|
||||
#
|
||||
# X5$Score_L <- "No Effect"
|
||||
# X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
|
||||
# X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
# X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
#
|
||||
# X5$Score_K <- "No Effect"
|
||||
# X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
|
||||
# X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
# X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
# X6$ORF <- X6$OrfRep
|
||||
# X6$ORF <- gsub("_1","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_2","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_3","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_4","",x=X6$ORF)
|
||||
#
|
||||
# X6$Score_L <- "No Effect"
|
||||
# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
|
||||
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
# X6$Score_K <- "No Effect"
|
||||
# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
|
||||
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
#express the na data as 0.001 in X1 for K and L
|
||||
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
#express the na data as 0.001 in X2
|
||||
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X4
|
||||
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X5
|
||||
# X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
# X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X6
|
||||
# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
|
||||
X1$Rank_L <- rank(X1$Z_lm_L)
|
||||
X1$Rank_K <- rank(X1$Z_lm_K)
|
||||
|
||||
X2$Rank_L <- rank(X2$Z_lm_L)
|
||||
X2$Rank_K <- rank(X2$Z_lm_K)
|
||||
|
||||
X4$Rank_L <- rank(X4$Z_lm_L)
|
||||
X4$Rank_K <- rank(X4$Z_lm_K)
|
||||
|
||||
# X5$Rank_L <- rank(X5$Z_lm_L)
|
||||
# X5$Rank_K <- rank(X5$Z_lm_K)
|
||||
|
||||
|
||||
# X6$Rank_L <- rank(X6$Z_lm_L)
|
||||
# X6$Rank_K <- rank(X6$Z_lm_K)
|
||||
|
||||
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
|
||||
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
|
||||
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
|
||||
#X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
|
||||
#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
|
||||
|
||||
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
|
||||
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
|
||||
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
|
||||
#colnames(X5) <- paste(colnames(X5),"_X5",sep="")
|
||||
#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
|
||||
# colnames(X1)[1] <- "OrfRep"
|
||||
# colnames(X2)[1] <- "OrfRep"
|
||||
# colnames(X4)[1] <- "OrfRep"
|
||||
# colnames(X5)[1] <- "OrfRep"
|
||||
# colnames(X6)[1] <- "OrfRep"
|
||||
|
||||
X <- cbind(X1,X2,X4)
|
||||
#print(dim(X))
|
||||
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
|
||||
#print(X[2700,])
|
||||
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
|
||||
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
|
||||
X$ORF <- X$OrfRep_X1
|
||||
|
||||
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
X$ORF <- gsub("_4","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
||||
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
|
||||
#try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
|
||||
#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
|
||||
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
|
||||
#
|
||||
# #express the na data as 0.001
|
||||
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
|
||||
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
|
||||
#
|
||||
# X$Overlap <- "No Effect"
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
#
|
||||
# X$Overlap_K <- "No Effect"
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
|
||||
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
|
||||
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
|
||||
#
|
||||
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
|
||||
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
||||
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
||||
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
|
||||
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
|
||||
#print(colnames(X_heatmap))
|
||||
#break()
|
||||
X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)]
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
|
||||
#colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
|
||||
#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
|
||||
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
|
||||
|
||||
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
||||
|
||||
for(s in 1:dim(X3)[1]){
|
||||
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
|
||||
#GO_ID_Arg <- "GO:0006325"
|
||||
GO_ID_Arg_loop <- as.character(X3[s,1])
|
||||
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
||||
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
||||
#only make plots if parent term has fewer than 500 children
|
||||
if(length(GOTerm_parent) > 100){
|
||||
#print(length(GOTerm_parent))
|
||||
next()
|
||||
}
|
||||
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
if(Parent_Size < 2){
|
||||
next()
|
||||
}
|
||||
if(Parent_Size > 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 200 && Parent_Size <= 500){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 100 && Parent_Size <= 200){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 60 && Parent_Size <= 100){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 30 && Parent_Size <= 60){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 3 && Parent_Size <= 30){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
if(Parent_Size == 2){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,729 @@
|
||||
library("ontologyIndex")
|
||||
library("ggplot2")
|
||||
library("RColorBrewer")
|
||||
library("grid")
|
||||
library("ggthemes")
|
||||
#library("plotly")
|
||||
#library("htmlwidgets")
|
||||
library("extrafont")
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("ggrepel")
|
||||
library("gplots")
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#Arg 5 is the GTF results 3
|
||||
input_file3 <- Args[5]
|
||||
|
||||
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
|
||||
Name3 <- Args[6]
|
||||
|
||||
#Arg 7 is GTF results 4
|
||||
input_file4 <- Args[7]
|
||||
|
||||
#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
|
||||
Name4 <- Args[8]
|
||||
|
||||
|
||||
#Arg 9 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
|
||||
ontology_obo_input <- Args[9]
|
||||
|
||||
#Arg 10 is the go_terms.tab file
|
||||
GOtermstab_file <- Args[10]
|
||||
|
||||
#Arg 11 is the GO:ID - example: chromatin organization is GO:0006325
|
||||
#for all processes use biological process GO:0008150
|
||||
#all functions use molecular function GO:0003674
|
||||
#all components use cellular component GO:0005575
|
||||
GO_ID_Arg <- Args[11]
|
||||
|
||||
#arg 12 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[12]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[12]
|
||||
|
||||
|
||||
|
||||
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
|
||||
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
|
||||
|
||||
|
||||
# if (file.exists(outputpath_X1_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X1_rank))
|
||||
# }
|
||||
#
|
||||
# if (file.exists(outputpath_X2_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X2_rank))
|
||||
# }
|
||||
#
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
|
||||
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
|
||||
|
||||
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
|
||||
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
|
||||
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
|
||||
#www.geneontology.org/ontology/gene_ontology_edit.obo file
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
|
||||
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
|
||||
X1$ORF <- X1$OrfRep
|
||||
X1$ORF <- gsub("_1","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_2","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_3","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_4","",x=X1$ORF)
|
||||
|
||||
X1$Score_L <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X1$Score_K <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
|
||||
|
||||
X2$ORF <- X2$OrfRep
|
||||
X2$ORF <- gsub("_1","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_2","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_3","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_4","",x=X2$ORF)
|
||||
|
||||
X2$Score_L <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X2$Score_K <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
X4$ORF <- X4$OrfRep
|
||||
X4$ORF <- gsub("_1","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_2","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_3","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_4","",x=X4$ORF)
|
||||
|
||||
X4$Score_L <- "No Effect"
|
||||
X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X4$Score_K <- "No Effect"
|
||||
X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
X5$ORF <- X5$OrfRep
|
||||
X5$ORF <- gsub("_1","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_2","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_3","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_4","",x=X5$ORF)
|
||||
|
||||
X5$Score_L <- "No Effect"
|
||||
X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X5$Score_K <- "No Effect"
|
||||
X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
# X6$ORF <- X6$OrfRep
|
||||
# X6$ORF <- gsub("_1","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_2","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_3","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_4","",x=X6$ORF)
|
||||
#
|
||||
# X6$Score_L <- "No Effect"
|
||||
# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
|
||||
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
# X6$Score_K <- "No Effect"
|
||||
# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
|
||||
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
#express the na data as 0.001 in X1 for K and L
|
||||
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
#express the na data as 0.001 in X2
|
||||
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X4
|
||||
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X5
|
||||
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X6
|
||||
# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
|
||||
#X1$Rank_L <- rank(X1$Z_lm_L)
|
||||
#X1$Rank_K <- rank(X1$Z_lm_K)
|
||||
|
||||
#X2$Rank_L <- rank(X2$Z_lm_L)
|
||||
#X2$Rank_K <- rank(X2$Z_lm_K)
|
||||
|
||||
#X4$Rank_L <- rank(X4$Z_lm_L)
|
||||
#X4$Rank_K <- rank(X4$Z_lm_K)
|
||||
|
||||
#X5$Rank_L <- rank(X5$Z_lm_L)
|
||||
#X5$Rank_K <- rank(X5$Z_lm_K)
|
||||
|
||||
|
||||
# X6$Rank_L <- rank(X6$Z_lm_L)
|
||||
# X6$Rank_K <- rank(X6$Z_lm_K)
|
||||
|
||||
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
|
||||
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
|
||||
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
|
||||
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
|
||||
#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
|
||||
|
||||
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
|
||||
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
|
||||
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
|
||||
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
|
||||
#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
|
||||
colnames(X1)[1] <- "OrfRep"
|
||||
colnames(X2)[1] <- "OrfRep"
|
||||
colnames(X4)[1] <- "OrfRep"
|
||||
colnames(X5)[1] <- "OrfRep"
|
||||
# colnames(X6)[1] <- "OrfRep"
|
||||
|
||||
#X <- cbind(X1,X2,X4,X5)
|
||||
#print(dim(X))
|
||||
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
|
||||
#print(X[2700,])
|
||||
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
|
||||
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
|
||||
X <- merge(X1,X2,by ="OrfRep",all=TRUE)
|
||||
X <- merge(X,X4,by ="OrfRep",all=TRUE)
|
||||
X <- merge(X,X5,by ="OrfRep",all=TRUE)
|
||||
X$ORF <- X$OrfRep
|
||||
|
||||
print(dim(X))
|
||||
print(colnames(X))
|
||||
|
||||
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
X$ORF <- gsub("_4","",x=X$ORF)
|
||||
|
||||
#remove new NAs
|
||||
X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
|
||||
X[is.na(X$Z_lm_K_X1),]$Z_lm_K_X1 <- 0.001
|
||||
|
||||
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
#express the na data as 0.001 in X2
|
||||
X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
|
||||
X[is.na(X$Z_lm_K_X2),]$Z_lm_K_X2 <- 0.001
|
||||
|
||||
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X4
|
||||
X[is.na(X$Z_lm_L_X4),]$Z_lm_L_X4 <- 0.001
|
||||
X[is.na(X$Z_lm_K_X4),]$Z_lm_K_X4 <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X5
|
||||
X[is.na(X$Z_lm_L_X5),]$Z_lm_L_X5 <- 0.001
|
||||
X[is.na(X$Z_lm_K_X5),]$Z_lm_K_X5 <- 0.001
|
||||
|
||||
try(X[is.na(X$Gene_X1),]$Gene_X1 <- "")
|
||||
try(X[is.na(X$Gene_X2),]$Gene_X2 <- "")
|
||||
try(X[is.na(X$Gene_X4),]$Gene_X4 <- "")
|
||||
try(X[is.na(X$Gene_X5),]$Gene_X5 <- "")
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep)
|
||||
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep)
|
||||
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep)
|
||||
#write.csv(X,paste(outputpath,"18_0614_all_3.csv",sep=""))
|
||||
#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
|
||||
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
|
||||
#
|
||||
# #express the na data as 0.001
|
||||
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
|
||||
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
|
||||
#
|
||||
# X$Overlap <- "No Effect"
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
#
|
||||
# X$Overlap_K <- "No Effect"
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
|
||||
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
|
||||
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
|
||||
#
|
||||
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
|
||||
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
|
||||
|
||||
print(head(X))
|
||||
|
||||
write.csv(X,paste(outputpath,"18_0614_all_2.csv",sep=""))
|
||||
write.csv(X1,paste(outputpath,"18_0614_all_X1.csv",sep=""))
|
||||
write.csv(X2,paste(outputpath,"18_0614_all_X2.csv",sep=""))
|
||||
write.csv(X4,paste(outputpath,"18_0614_all_X4.csv",sep=""))
|
||||
write.csv(X5,paste(outputpath,"18_0614_all_X5.csv",sep=""))
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
||||
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
||||
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
|
||||
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
|
||||
#print(colnames(X_heatmap))
|
||||
#break()
|
||||
|
||||
print(colnames(X_heatmap))
|
||||
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)]
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
|
||||
#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
|
||||
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
|
||||
|
||||
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
||||
write.csv(X_heatmap,paste(outputpath,"18_0614_all.csv",sep=""))
|
||||
#break()
|
||||
|
||||
for(s in 1:dim(X3)[1]){
|
||||
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
|
||||
#GO_ID_Arg <- "GO:0006325"
|
||||
GO_ID_Arg_loop <- as.character(X3[s,1])
|
||||
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
||||
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
||||
#only make plots if parent term has fewer than 500 children
|
||||
if(length(GOTerm_parent) > 100){
|
||||
#print(length(GOTerm_parent))
|
||||
next()
|
||||
}
|
||||
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
if(Parent_Size < 2){
|
||||
next()
|
||||
}
|
||||
if(Parent_Size > 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 200 && Parent_Size <= 500){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 100 && Parent_Size <= 200){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 60 && Parent_Size <= 100){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 30 && Parent_Size <= 60){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 2 && Parent_Size <= 30){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,720 @@
|
||||
library("ontologyIndex")
|
||||
library("ggplot2")
|
||||
library("RColorBrewer")
|
||||
library("grid")
|
||||
library("ggthemes")
|
||||
#library("plotly")
|
||||
#library("htmlwidgets")
|
||||
library("extrafont")
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("ggrepel")
|
||||
library("gplots")
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#Arg 5 is the GTF results 3
|
||||
input_file3 <- Args[5]
|
||||
|
||||
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
|
||||
Name3 <- Args[6]
|
||||
|
||||
#Arg 7 is GTF results 4
|
||||
input_file4 <- Args[7]
|
||||
|
||||
#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
|
||||
Name4 <- Args[8]
|
||||
|
||||
|
||||
#Arg 9 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
|
||||
ontology_obo_input <- Args[9]
|
||||
|
||||
#Arg 10 is the go_terms.tab file
|
||||
GOtermstab_file <- Args[10]
|
||||
|
||||
#Arg 11 is the GO:ID - example: chromatin organization is GO:0006325
|
||||
#for all processes use biological process GO:0008150
|
||||
#all functions use molecular function GO:0003674
|
||||
#all components use cellular component GO:0005575
|
||||
GO_ID_Arg <- Args[11]
|
||||
|
||||
#arg 12 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[12]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[12]
|
||||
|
||||
|
||||
|
||||
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
|
||||
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
|
||||
|
||||
|
||||
# if (file.exists(outputpath_X1_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X1_rank))
|
||||
# }
|
||||
#
|
||||
# if (file.exists(outputpath_X2_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X2_rank))
|
||||
# }
|
||||
#
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
|
||||
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
|
||||
|
||||
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
|
||||
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
|
||||
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
|
||||
#www.geneontology.org/ontology/gene_ontology_edit.obo file
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
|
||||
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
|
||||
X1$ORF <- X1$OrfRep
|
||||
X1$ORF <- gsub("_1","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_2","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_3","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_4","",x=X1$ORF)
|
||||
|
||||
X1$Score_L <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X1$Score_K <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
|
||||
|
||||
X2$ORF <- X2$OrfRep
|
||||
X2$ORF <- gsub("_1","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_2","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_3","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_4","",x=X2$ORF)
|
||||
|
||||
X2$Score_L <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X2$Score_K <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
X4$ORF <- X4$OrfRep
|
||||
X4$ORF <- gsub("_1","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_2","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_3","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_4","",x=X4$ORF)
|
||||
|
||||
X4$Score_L <- "No Effect"
|
||||
X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X4$Score_K <- "No Effect"
|
||||
X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
X5$ORF <- X5$OrfRep
|
||||
X5$ORF <- gsub("_1","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_2","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_3","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_4","",x=X5$ORF)
|
||||
|
||||
X5$Score_L <- "No Effect"
|
||||
X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X5$Score_K <- "No Effect"
|
||||
X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
# X6$ORF <- X6$OrfRep
|
||||
# X6$ORF <- gsub("_1","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_2","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_3","",x=X6$ORF)
|
||||
# X6$ORF <- gsub("_4","",x=X6$ORF)
|
||||
#
|
||||
# X6$Score_L <- "No Effect"
|
||||
# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
|
||||
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
# X6$Score_K <- "No Effect"
|
||||
# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
|
||||
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
#express the na data as 0.001 in X1 for K and L
|
||||
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
#express the na data as 0.001 in X2
|
||||
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X4
|
||||
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X5
|
||||
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X6
|
||||
# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
|
||||
X1$Rank_L <- rank(X1$Z_lm_L)
|
||||
X1$Rank_K <- rank(X1$Z_lm_K)
|
||||
|
||||
X2$Rank_L <- rank(X2$Z_lm_L)
|
||||
X2$Rank_K <- rank(X2$Z_lm_K)
|
||||
|
||||
X4$Rank_L <- rank(X4$Z_lm_L)
|
||||
X4$Rank_K <- rank(X4$Z_lm_K)
|
||||
|
||||
X5$Rank_L <- rank(X5$Z_lm_L)
|
||||
X5$Rank_K <- rank(X5$Z_lm_K)
|
||||
|
||||
|
||||
# X6$Rank_L <- rank(X6$Z_lm_L)
|
||||
# X6$Rank_K <- rank(X6$Z_lm_K)
|
||||
|
||||
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
|
||||
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
|
||||
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
|
||||
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
|
||||
#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
|
||||
|
||||
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
|
||||
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
|
||||
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
|
||||
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
|
||||
#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
|
||||
# colnames(X1)[1] <- "OrfRep"
|
||||
# colnames(X2)[1] <- "OrfRep"
|
||||
# colnames(X4)[1] <- "OrfRep"
|
||||
# colnames(X5)[1] <- "OrfRep"
|
||||
# colnames(X6)[1] <- "OrfRep"
|
||||
|
||||
X <- cbind(X1,X2,X4,X5)
|
||||
#print(dim(X))
|
||||
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
|
||||
#print(X[2700,])
|
||||
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
|
||||
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
|
||||
X$ORF <- X$OrfRep_X1
|
||||
|
||||
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
X$ORF <- gsub("_4","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
||||
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
|
||||
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
|
||||
#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
|
||||
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
|
||||
#
|
||||
# #express the na data as 0.001
|
||||
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
|
||||
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
|
||||
#
|
||||
# X$Overlap <- "No Effect"
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
#
|
||||
# X$Overlap_K <- "No Effect"
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
|
||||
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
|
||||
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
|
||||
#
|
||||
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
|
||||
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
||||
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
||||
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
|
||||
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
|
||||
#print(colnames(X_heatmap))
|
||||
#break()
|
||||
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)]
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
|
||||
#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
|
||||
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
|
||||
|
||||
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
||||
|
||||
for(s in 1:dim(X3)[1]){
|
||||
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
|
||||
#GO_ID_Arg <- "GO:0006325"
|
||||
GO_ID_Arg_loop <- as.character(X3[s,1])
|
||||
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
||||
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
||||
#only make plots if parent term has fewer than 500 children
|
||||
if(length(GOTerm_parent) > 100){
|
||||
#print(length(GOTerm_parent))
|
||||
next()
|
||||
}
|
||||
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
if(Parent_Size < 2){
|
||||
next()
|
||||
}
|
||||
if(Parent_Size > 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 200 && Parent_Size <= 500){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 100 && Parent_Size <= 200){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 60 && Parent_Size <= 100){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 30 && Parent_Size <= 60){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 3 && Parent_Size <= 30){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size == 2){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
#print(X0)
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,754 @@
|
||||
library("ontologyIndex")
|
||||
library("ggplot2")
|
||||
library("RColorBrewer")
|
||||
library("grid")
|
||||
library("ggthemes")
|
||||
#library("plotly")
|
||||
#library("htmlwidgets")
|
||||
library("extrafont")
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("ggrepel")
|
||||
library(gplots)
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#Arg 5 is the GTF results 3
|
||||
input_file3 <- Args[5]
|
||||
|
||||
#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
|
||||
Name3 <- Args[6]
|
||||
|
||||
#Arg 7 is GTF results 4
|
||||
input_file4 <- Args[7]
|
||||
|
||||
#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
|
||||
Name4 <- Args[8]
|
||||
|
||||
#Arg 9 is GTF results 5
|
||||
input_file5 <- Args[9]
|
||||
|
||||
#Arg 10 is the name of Interaction score file (Zscores_Interaction.csv) 5 to print in the results
|
||||
Name5 <- Args[10]
|
||||
|
||||
#Arg 11 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
|
||||
ontology_obo_input <- Args[11]
|
||||
|
||||
#Arg 12 is the go_terms.tab file
|
||||
GOtermstab_file <- Args[12]
|
||||
|
||||
#Arg 13 is the GO:ID - example: chromatin organization is GO:0006325
|
||||
#for all processes use biological process GO:0008150
|
||||
#all functions use molecular function GO:0003674
|
||||
#all components use cellular component GO:0005575
|
||||
GO_ID_Arg <- Args[13]
|
||||
|
||||
#arg 14 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[14]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[14]
|
||||
|
||||
|
||||
|
||||
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
|
||||
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
|
||||
|
||||
|
||||
# if (file.exists(outputpath_X1_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X1_rank))
|
||||
# }
|
||||
#
|
||||
# if (file.exists(outputpath_X2_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X2_rank))
|
||||
# }
|
||||
#
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
|
||||
X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
|
||||
|
||||
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
|
||||
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
|
||||
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
|
||||
#www.geneontology.org/ontology/gene_ontology_edit.obo file
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
|
||||
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
|
||||
X1$ORF <- X1$OrfRep
|
||||
X1$ORF <- gsub("_1","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_2","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_3","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_4","",x=X1$ORF)
|
||||
|
||||
X1$Score_L <- "No Effect"
|
||||
try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X1$Score_K <- "No Effect"
|
||||
try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
|
||||
|
||||
|
||||
|
||||
X2$ORF <- X2$OrfRep
|
||||
X2$ORF <- gsub("_1","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_2","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_3","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_4","",x=X2$ORF)
|
||||
|
||||
X2$Score_L <- "No Effect"
|
||||
try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X2$Score_K <- "No Effect"
|
||||
try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
|
||||
|
||||
X4$ORF <- X4$OrfRep
|
||||
X4$ORF <- gsub("_1","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_2","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_3","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_4","",x=X4$ORF)
|
||||
|
||||
X4$Score_L <- "No Effect"
|
||||
try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X4$Score_K <- "No Effect"
|
||||
try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
|
||||
X5$ORF <- X5$OrfRep
|
||||
X5$ORF <- gsub("_1","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_2","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_3","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_4","",x=X5$ORF)
|
||||
|
||||
X5$Score_L <- "No Effect"
|
||||
try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X5$Score_K <- "No Effect"
|
||||
try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
|
||||
|
||||
X6$ORF <- X6$OrfRep
|
||||
X6$ORF <- gsub("_1","",x=X6$ORF)
|
||||
X6$ORF <- gsub("_2","",x=X6$ORF)
|
||||
X6$ORF <- gsub("_3","",x=X6$ORF)
|
||||
X6$ORF <- gsub("_4","",x=X6$ORF)
|
||||
|
||||
X6$Score_L <- "No Effect"
|
||||
try(X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X6$Score_K <- "No Effect"
|
||||
try(X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
|
||||
#express the na data as 0.001 in X1 for K and L
|
||||
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
#express the na data as 0.001 in X2
|
||||
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X4
|
||||
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X5
|
||||
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
#express the na data as 0.001 in X6
|
||||
X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
|
||||
|
||||
X1$Rank_L <- rank(X1$Z_lm_L)
|
||||
X1$Rank_K <- rank(X1$Z_lm_K)
|
||||
|
||||
X2$Rank_L <- rank(X2$Z_lm_L)
|
||||
X2$Rank_K <- rank(X2$Z_lm_K)
|
||||
|
||||
X4$Rank_L <- rank(X4$Z_lm_L)
|
||||
X4$Rank_K <- rank(X4$Z_lm_K)
|
||||
|
||||
X5$Rank_L <- rank(X5$Z_lm_L)
|
||||
X5$Rank_K <- rank(X5$Z_lm_K)
|
||||
|
||||
|
||||
X6$Rank_L <- rank(X6$Z_lm_L)
|
||||
X6$Rank_K <- rank(X6$Z_lm_K)
|
||||
|
||||
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
|
||||
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
|
||||
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
|
||||
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
|
||||
X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
|
||||
|
||||
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
|
||||
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
|
||||
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
|
||||
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
|
||||
colnames(X6) <- paste(colnames(X6),"_X6",sep="")
|
||||
# colnames(X1)[1] <- "OrfRep"
|
||||
# colnames(X2)[1] <- "OrfRep"
|
||||
# colnames(X4)[1] <- "OrfRep"
|
||||
# colnames(X5)[1] <- "OrfRep"
|
||||
# colnames(X6)[1] <- "OrfRep"
|
||||
|
||||
X <- cbind(X1,X2,X4,X5,X6)
|
||||
#print(dim(X))
|
||||
#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
|
||||
#print(X[2700,])
|
||||
#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
|
||||
#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
|
||||
X$ORF <- X$OrfRep_X1
|
||||
|
||||
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
X$ORF <- gsub("_4","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
||||
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
|
||||
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
|
||||
try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
|
||||
#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
|
||||
#
|
||||
# #express the na data as 0.001
|
||||
# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
|
||||
# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
|
||||
#
|
||||
# X$Overlap <- "No Effect"
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
#
|
||||
# X$Overlap_K <- "No Effect"
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
|
||||
# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
|
||||
# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
|
||||
#
|
||||
# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
|
||||
# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
||||
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
|
||||
colnames(X) == "Z_Shift_K_X6" | colnames(X) == "Z_lm_K_X6" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
||||
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
|
||||
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" |
|
||||
colnames(X) == "Z_Shift_L_X6" | colnames(X) == "Z_lm_L_X6"]
|
||||
#print(colnames(X_heatmap))
|
||||
#break()
|
||||
X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
|
||||
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
|
||||
|
||||
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
||||
|
||||
for(s in 1:dim(X3)[1]){
|
||||
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
|
||||
#GO_ID_Arg <- "GO:0006325"
|
||||
GO_ID_Arg_loop <- as.character(X3[s,1])
|
||||
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
||||
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
||||
#only make plots if parent term has fewer than 500 children
|
||||
if(length(GOTerm_parent) > 100){
|
||||
#print(length(GOTerm_parent))
|
||||
next()
|
||||
}
|
||||
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
if(Parent_Size < 2){
|
||||
next()
|
||||
}
|
||||
if(Parent_Size > 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 200 && Parent_Size <= 500){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 100 && Parent_Size <= 200){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 60 && Parent_Size <= 100){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 30 && Parent_Size <= 60){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 3 && Parent_Size <= 30){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size == 2){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,622 @@
|
||||
library("ontologyIndex")
|
||||
library("ggplot2")
|
||||
library("RColorBrewer")
|
||||
library("grid")
|
||||
library("ggthemes")
|
||||
#library("plotly")
|
||||
#library("htmlwidgets")
|
||||
library("extrafont")
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("ggrepel")
|
||||
library(gplots)
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the GTF results 1
|
||||
input_file1 <- Args[1]
|
||||
|
||||
#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
|
||||
Name1 <- Args[2]
|
||||
|
||||
#Arg 3 is GTF results 3
|
||||
input_file2 <- Args[3]
|
||||
|
||||
#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
|
||||
Name2 <- Args[4]
|
||||
|
||||
#Arg 5 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
|
||||
ontology_obo_input <- Args[5]
|
||||
|
||||
#Arg 6 is the go_terms.tab file
|
||||
GOtermstab_file <- Args[6]
|
||||
|
||||
#Arg 7 is the GO:ID - example: chromatin organization is GO:0006325
|
||||
#for all processes use biological process GO:0008150
|
||||
#all functions use molecular function GO:0003674
|
||||
#all components use cellular component GO:0005575
|
||||
GO_ID_Arg <- Args[7]
|
||||
|
||||
#arg 8 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[8]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[8]
|
||||
|
||||
|
||||
|
||||
#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
|
||||
#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
|
||||
|
||||
|
||||
# if (file.exists(outputpath_X1_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X1_rank))
|
||||
# }
|
||||
#
|
||||
# if (file.exists(outputpath_X2_rank)){
|
||||
# #outputpath <- subDir
|
||||
# } else {
|
||||
# dir.create(file.path(outputpath_X2_rank))
|
||||
# }
|
||||
#
|
||||
|
||||
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
|
||||
|
||||
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
|
||||
X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
|
||||
X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
|
||||
|
||||
#Name1 <- "DOXO_HLD"
|
||||
#Name2 <- "DOXO_HLEG"
|
||||
|
||||
|
||||
#www.geneontology.org/ontology/gene_ontology_edit.obo file
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
|
||||
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
|
||||
X1$ORF <- X1$OrfRep
|
||||
X1$ORF <- gsub("_1","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_2","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_3","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_4","",x=X1$ORF)
|
||||
|
||||
X1$Score_L <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X1$Score_K <- "No Effect"
|
||||
X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
|
||||
|
||||
|
||||
X2$ORF <- X2$OrfRep
|
||||
X2$ORF <- gsub("_1","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_2","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_3","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_4","",x=X2$ORF)
|
||||
|
||||
X2$Score_L <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
|
||||
X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
|
||||
|
||||
X2$Score_K <- "No Effect"
|
||||
X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
|
||||
X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
|
||||
|
||||
#express the na data as 0.001 in X1 for K and L
|
||||
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
#express the na data as 0.001 in X2
|
||||
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
|
||||
#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
|
||||
|
||||
|
||||
X1$Rank_L <- rank(X1$Z_lm_L)
|
||||
X1$Rank_K <- rank(X1$Z_lm_K)
|
||||
|
||||
X2$Rank_L <- rank(X2$Z_lm_L)
|
||||
X2$Rank_K <- rank(X2$Z_lm_K)
|
||||
|
||||
X <- merge(X1,X2,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
X$ORF <- X$OrfRep
|
||||
|
||||
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
X$ORF <- gsub("_4","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep)
|
||||
|
||||
|
||||
#express the na data as 0.001
|
||||
#X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
|
||||
#X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
|
||||
|
||||
X$Overlap <- "No Effect"
|
||||
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
|
||||
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
|
||||
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
|
||||
X$Overlap_K <- "No Effect"
|
||||
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
|
||||
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
|
||||
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
|
||||
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
|
||||
try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
|
||||
try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
|
||||
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
|
||||
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
|
||||
try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
|
||||
try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
|
||||
try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
|
||||
try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
|
||||
|
||||
X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
|
||||
X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
|
||||
|
||||
X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
|
||||
X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
|
||||
X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
|
||||
|
||||
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
||||
|
||||
for(s in 1:dim(X3)[1]){
|
||||
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
|
||||
#GO_ID_Arg <- "GO:0006325"
|
||||
GO_ID_Arg_loop <- as.character(X3[s,1])
|
||||
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
||||
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
||||
#only make plots if parent term has fewer than 500 children
|
||||
if(length(GOTerm_parent) > 100){
|
||||
#print(length(GOTerm_parent))
|
||||
next()
|
||||
}
|
||||
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
if(Parent_Size < 2){
|
||||
next()
|
||||
}
|
||||
if(Parent_Size > 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 200 && Parent_Size <= 500){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 100 && Parent_Size <= 200){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 60 && Parent_Size <= 100){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 30 && Parent_Size <= 60){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 3 && Parent_Size <= 30){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size == 2){
|
||||
pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
239
workflow/.old/apps/r/SSscripts/ScoreAllGOTerms_From_Z_lm_V2.R
Normal file
239
workflow/.old/apps/r/SSscripts/ScoreAllGOTerms_From_Z_lm_V2.R
Normal file
@@ -0,0 +1,239 @@
|
||||
######This code is to generate average GO term scores for all gene deletions from Zscores for a Q-HTCP screen
|
||||
|
||||
#Run using Rscript from the command line
|
||||
#Rscript 18_0119_ScoreAllGOTerms_From.R InteractionScores.csv go_terms.tab gene_association.sgd output_directory
|
||||
|
||||
#requires an input ZScores_Interaction.csv file generated by SumZscore_Interaction.R after applying code to generate average Z score for each deletion
|
||||
#in a Q-HTCP screen
|
||||
#requires various packages as called below (need bioconductor installed to use "org.Sc.sgd.db")
|
||||
|
||||
#also requires some files from SGD (https://www.yeastgenome.org/) and can be downloaded from their website
|
||||
#requires the go_terms.tab and gene_association.sgd
|
||||
|
||||
#Script will take ZScores_Interaction.csv file and calculate the average Zscore for every GO term
|
||||
#Output files will include all GO terms, GO terms curated so that only terms with an average score + or - the SD remains above 2 or below -2
|
||||
#Various other output files will give some statistics on the size of GO terms and the average scores, the SD, etc.
|
||||
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("plyr")
|
||||
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the ZScores_Interaction.csv
|
||||
input_file <- Args[1]
|
||||
|
||||
#arg 2 is the go_terms.tab
|
||||
#https://downloads.yeastgenome.org/curation/literature/go_terms.tab
|
||||
SGD_Terms_file <- Args[2]
|
||||
|
||||
#arg 3 is the gene_association.sgd
|
||||
#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
|
||||
SGD_features_file <- Args[3]
|
||||
|
||||
#arg 4 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[4]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[4]
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/"
|
||||
|
||||
#X is the Z score data
|
||||
#should have ORF in first column, Gene in second column, and then Scores to be averaged in the next columns (3:max)
|
||||
#make sure NA values are included (don't use 0.001) - 0.001 is needed for clustering but this will incorrectly calculate average GTF scores
|
||||
X <- read.csv(file = input_file,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
if(colnames(X)[1] == "OrfRep"){
|
||||
colnames(X)[1] <- "ORF"
|
||||
}
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
#SGD features (not needed)
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/SGD_features.tab",header=FALSE,quote = "",col.names = c("SGD_ID","Feature_Type","Qualifier","ORF","Gene","Alias","Parent_Feature","Secondardy_SGD_ID","Chromosome","Start_Coordinate","Stop_Coordinate","Strand","Genetic_Position","Coordinate_Position","Sequence_Version","Description"))
|
||||
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
|
||||
#Get the ORF names associated with each gene/GO term
|
||||
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Get the numeric GO ID for matching
|
||||
Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
#get all unique GO terms
|
||||
GO_Terms <- unique(Gene_Association$GO_ID)
|
||||
|
||||
#create a character vector with just the ColNames of the input file to store the scores for each GO term
|
||||
Col_Names_X <- colnames(X)
|
||||
#create a data_frame with header from input_file
|
||||
GO_Term_Averages <- X[0,]
|
||||
#fill table with NAs same length as number of GO terms
|
||||
GO_Term_Averages[1:length(GO_Terms),] <- NA
|
||||
|
||||
#change the first and second col names to GO_ID and Term
|
||||
colnames(GO_Term_Averages)[1] <- "GO_ID"
|
||||
colnames(GO_Term_Averages)[2] <- "Term"
|
||||
|
||||
#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
|
||||
GO_Term_Averages$Ontology <- NA
|
||||
GO_Term_Averages$NumGenes <- NA
|
||||
GO_Term_Averages$AllPossibleGenes <- NA
|
||||
GO_Term_Averages$Genes <- NA
|
||||
GO_Term_Averages$ORFs <- NA
|
||||
|
||||
#create a data.frame for the standard deviation info
|
||||
GO_Term_SD <- X[0,]
|
||||
GO_Term_SD[1:length(GO_Terms),] <- NA
|
||||
|
||||
colnames(GO_Term_SD)[1] <- "GO_ID"
|
||||
colnames(GO_Term_SD)[2] <- "Term"
|
||||
|
||||
#GO_Term_SD$Ontology <- NA
|
||||
#GO_Term_SD$NumGenes <- NA
|
||||
#GO_Term_SD$AllPossibleGenes <- NA
|
||||
|
||||
|
||||
#Loop for each GO term to get an average L and K Z score
|
||||
for(i in 1:length(GO_Terms)){
|
||||
#get the GO_Term
|
||||
ID <- GO_Terms[i]
|
||||
|
||||
#Get data.frame for all genes associated to the GO Term
|
||||
ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
|
||||
#get a vector of just the gene names
|
||||
ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
|
||||
if(length(unique(ID_AllGenes_vector)) > 4000){
|
||||
next()
|
||||
}
|
||||
#get the GO term character description where numeric Terms ID matches GO_Term's ID
|
||||
GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
|
||||
|
||||
#get the Z scores for all genes in the GO_ID
|
||||
Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
|
||||
|
||||
#get the Gene names and ORFs for the term
|
||||
GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
|
||||
GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
|
||||
|
||||
#dataframe to report the averages for a GO term
|
||||
|
||||
#get the GO ID
|
||||
GO_Term_Averages$GO_ID[i] <- as.character(ID)
|
||||
|
||||
#get the term name
|
||||
GO_Term_Averages$Term[i] <- GO_Description_Term
|
||||
|
||||
|
||||
#get total number of genes annotated to the Term that we have in our library
|
||||
GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
|
||||
|
||||
#get total number of genes annotated to the Term in SGD
|
||||
GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
|
||||
|
||||
#get the ontology of the term
|
||||
GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
|
||||
|
||||
#calculate the average score for every column
|
||||
for(j in 3:length(X[1,])){
|
||||
GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
|
||||
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
|
||||
}
|
||||
|
||||
#also calculate same values for the SD
|
||||
GO_Term_SD$GO_ID[i] <- as.character(ID)
|
||||
|
||||
#get the term name
|
||||
GO_Term_SD$Term[i] <- GO_Description_Term
|
||||
|
||||
|
||||
|
||||
#calculate column scores for SD
|
||||
for(j in 3:length(X[1,])){
|
||||
GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
|
||||
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#add either _Avg or _SD depending on if the calculated score is an average or SD
|
||||
colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
|
||||
colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
|
||||
|
||||
#combine the averages with the SDs to make one big data.frame
|
||||
X2 <- cbind(GO_Term_Averages,GO_Term_SD)
|
||||
#test[ , order(names(test))]
|
||||
X2 <- X2[,order(names(X2))]
|
||||
|
||||
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
|
||||
#create output file
|
||||
write.csv(X2,file=paste(outputpath,"Average_GOTerms_All.csv",sep=""),row.names=FALSE)
|
||||
|
||||
|
||||
#remove NAs
|
||||
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
|
||||
#identify redundant GO terms
|
||||
|
||||
for(i in 1:length(X3[,1])){
|
||||
#loop through each GO term - get term
|
||||
GO_term_ID <- as.character(X3$GO_ID_Avg[i])
|
||||
#get term in the X3
|
||||
X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
|
||||
#get anywhere that has the same number K_Avg value
|
||||
X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
|
||||
if(length(X3_Temp2[,1]) > 1){
|
||||
if(length(unique(X3_Temp2$Genes_Avg)) == 1){
|
||||
X3_Temp2 <- X3_Temp2[1,]
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(i == 1){
|
||||
Y <- X3_Temp2
|
||||
}
|
||||
|
||||
if(i > 1){
|
||||
Y <- rbind(Y,X3_Temp2)
|
||||
}
|
||||
}
|
||||
|
||||
Y1 <- unique(Y)
|
||||
|
||||
|
||||
write.csv(Y1,file=paste(outputpath,"Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
|
||||
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
|
||||
write.csv(Y2,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
|
||||
write.csv(Y3,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
|
||||
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
|
||||
write.csv(Y4,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
|
||||
write.csv(Y5,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,239 @@
|
||||
######This code is to generate average GO term scores for all gene deletions from Zscores for a Q-HTCP screen
|
||||
|
||||
#Run using Rscript from the command line
|
||||
#Rscript 18_0119_ScoreAllGOTerms_From.R InteractionScores.csv go_terms.tab gene_association.sgd output_directory
|
||||
|
||||
#requires an input ZScores_Interaction.csv file generated by SumZscore_Interaction.R after applying code to generate average Z score for each deletion
|
||||
#in a Q-HTCP screen
|
||||
#requires various packages as called below (need bioconductor installed to use "org.Sc.sgd.db")
|
||||
|
||||
#also requires some files from SGD (https://www.yeastgenome.org/) and can be downloaded from their website
|
||||
#requires the go_terms.tab and gene_association.sgd
|
||||
|
||||
#Script will take ZScores_Interaction.csv file and calculate the average Zscore for every GO term
|
||||
#Output files will include all GO terms, GO terms curated so that only terms with an average score + or - the SD remains above 2 or below -2
|
||||
#Various other output files will give some statistics on the size of GO terms and the average scores, the SD, etc.
|
||||
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("plyr")
|
||||
|
||||
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
#Arg 1 is the ZScores_Interaction.csv
|
||||
input_file <- Args[1]
|
||||
|
||||
#arg 2 is the go_terms.tab
|
||||
#https://downloads.yeastgenome.org/curation/literature/go_terms.tab
|
||||
SGD_Terms_file <- Args[2]
|
||||
|
||||
#arg 3 is the gene_association.sgd
|
||||
#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
|
||||
SGD_features_file <- Args[3]
|
||||
|
||||
#arg 4 is the directory to put the results into (and create that directory if needed)
|
||||
subDir <- Args[4]
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- Args[4]
|
||||
|
||||
#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/"
|
||||
|
||||
#X is the Z score data
|
||||
#should have ORF in first column, Gene in second column, and then Scores to be averaged in the next columns (3:max)
|
||||
#make sure NA values are included (don't use 0.001) - 0.001 is needed for clustering but this will incorrectly calculate average GTF scores
|
||||
X <- read.csv(file = input_file,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
|
||||
|
||||
if(colnames(X)[1] == "OrfRep"){
|
||||
colnames(X)[1] <- "ORF"
|
||||
}
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
#SGD features (not needed)
|
||||
#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/SGD_features.tab",header=FALSE,quote = "",col.names = c("SGD_ID","Feature_Type","Qualifier","ORF","Gene","Alias","Parent_Feature","Secondardy_SGD_ID","Chromosome","Start_Coordinate","Stop_Coordinate","Strand","Genetic_Position","Coordinate_Position","Sequence_Version","Description"))
|
||||
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
|
||||
#Get the ORF names associated with each gene/GO term
|
||||
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Get the numeric GO ID for matching
|
||||
Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
#get all unique GO terms
|
||||
GO_Terms <- unique(Gene_Association$GO_ID)
|
||||
|
||||
#create a character vector with just the ColNames of the input file to store the scores for each GO term
|
||||
Col_Names_X <- colnames(X)
|
||||
#create a data_frame with header from input_file
|
||||
GO_Term_Averages <- X[0,]
|
||||
#fill table with NAs same length as number of GO terms
|
||||
GO_Term_Averages[1:length(GO_Terms),] <- NA
|
||||
|
||||
#change the first and second col names to GO_ID and Term
|
||||
colnames(GO_Term_Averages)[1] <- "GO_ID"
|
||||
colnames(GO_Term_Averages)[2] <- "Term"
|
||||
|
||||
#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
|
||||
GO_Term_Averages$Ontology <- NA
|
||||
GO_Term_Averages$NumGenes <- NA
|
||||
GO_Term_Averages$AllPossibleGenes <- NA
|
||||
GO_Term_Averages$Genes <- NA
|
||||
GO_Term_Averages$ORFs <- NA
|
||||
|
||||
#create a data.frame for the standard deviation info
|
||||
GO_Term_SD <- X[0,]
|
||||
GO_Term_SD[1:length(GO_Terms),] <- NA
|
||||
|
||||
colnames(GO_Term_SD)[1] <- "GO_ID"
|
||||
colnames(GO_Term_SD)[2] <- "Term"
|
||||
|
||||
#GO_Term_SD$Ontology <- NA
|
||||
#GO_Term_SD$NumGenes <- NA
|
||||
#GO_Term_SD$AllPossibleGenes <- NA
|
||||
|
||||
|
||||
#Loop for each GO term to get an average L and K Z score
|
||||
for(i in 1:length(GO_Terms)){
|
||||
#get the GO_Term
|
||||
ID <- GO_Terms[i]
|
||||
|
||||
#Get data.frame for all genes associated to the GO Term
|
||||
ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
|
||||
#get a vector of just the gene names
|
||||
ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
|
||||
if(length(unique(ID_AllGenes_vector)) > 4000){
|
||||
next()
|
||||
}
|
||||
#get the GO term character description where numeric Terms ID matches GO_Term's ID
|
||||
GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
|
||||
|
||||
#get the Z scores for all genes in the GO_ID
|
||||
Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
|
||||
|
||||
#get the Gene names and ORFs for the term
|
||||
GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
|
||||
GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
|
||||
|
||||
#dataframe to report the averages for a GO term
|
||||
|
||||
#get the GO ID
|
||||
GO_Term_Averages$GO_ID[i] <- as.character(ID)
|
||||
|
||||
#get the term name
|
||||
GO_Term_Averages$Term[i] <- GO_Description_Term
|
||||
|
||||
|
||||
#get total number of genes annotated to the Term that we have in our library
|
||||
GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
|
||||
|
||||
#get total number of genes annotated to the Term in SGD
|
||||
GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
|
||||
|
||||
#get the ontology of the term
|
||||
GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
|
||||
|
||||
#calculate the average score for every column
|
||||
for(j in 3:length(X[1,])){
|
||||
GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
|
||||
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
|
||||
}
|
||||
|
||||
#also calculate same values for the SD
|
||||
GO_Term_SD$GO_ID[i] <- as.character(ID)
|
||||
|
||||
#get the term name
|
||||
GO_Term_SD$Term[i] <- GO_Description_Term
|
||||
|
||||
|
||||
|
||||
#calculate column scores for SD
|
||||
for(j in 3:length(X[1,])){
|
||||
GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
|
||||
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#add either _Avg or _SD depending on if the calculated score is an average or SD
|
||||
colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
|
||||
colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
|
||||
|
||||
#combine the averages with the SDs to make one big data.frame
|
||||
X2 <- cbind(GO_Term_Averages,GO_Term_SD)
|
||||
#test[ , order(names(test))]
|
||||
X2 <- X2[,order(names(X2))]
|
||||
|
||||
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
|
||||
#create output file
|
||||
write.csv(X2,file=paste(outputpath,"Average_GOTerms_All.csv",sep=""),row.names=FALSE)
|
||||
|
||||
|
||||
#remove NAs
|
||||
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
|
||||
#identify redundant GO terms
|
||||
|
||||
for(i in 1:length(X3[,1])){
|
||||
#loop through each GO term - get term
|
||||
GO_term_ID <- as.character(X3$GO_ID_Avg[i])
|
||||
#get term in the X3
|
||||
X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
|
||||
#get anywhere that has the same number K_Avg value
|
||||
X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
|
||||
if(length(X3_Temp2[,1]) > 1){
|
||||
if(length(unique(X3_Temp2$Genes_Avg)) == 1){
|
||||
X3_Temp2 <- X3_Temp2[1,]
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(i == 1){
|
||||
Y <- X3_Temp2
|
||||
}
|
||||
|
||||
if(i > 1){
|
||||
Y <- rbind(Y,X3_Temp2)
|
||||
}
|
||||
}
|
||||
|
||||
Y1 <- unique(Y)
|
||||
|
||||
|
||||
write.csv(Y1,file=paste(outputpath,"Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
|
||||
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
|
||||
write.csv(Y2,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
|
||||
write.csv(Y3,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
|
||||
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
|
||||
write.csv(Y4,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
|
||||
write.csv(Y5,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
Reference in New Issue
Block a user