Finish PairwiseLK module
This commit is contained in:
@@ -1,3 +1,16 @@
|
||||
#!/usr/bin/env Rscript
|
||||
# Makes heat maps of multiple experiments
|
||||
#
|
||||
# Updated 240724 Bryan C Roessler to improve file operations and portability
|
||||
# I tried to leave as much logic intact as possible, just feeding in vars in a better way
|
||||
# NOTE: The script now has 7 required arguments and a variable number of input experiments
|
||||
# @arg $1 string StudyInfo.csv file
|
||||
# @arg $2 string gene_ontology_edit.obo file
|
||||
# @arg $3 string go_terms.tab file
|
||||
# @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
|
||||
# @arg $5 string ZScores_interaction.csv
|
||||
# @arg $6 string base directory
|
||||
# @arg $7 string output directory
|
||||
|
||||
library("ontologyIndex")
|
||||
library("ggplot2")
|
||||
@@ -10,93 +23,43 @@ library("extrafont")
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("ggrepel")
|
||||
library(gplots)
|
||||
Wstudy=getwd()
|
||||
inputFile= 'asdf'
|
||||
inputFile[1] <- "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
|
||||
inputFile[2] <- "../Exp2/ZScores/ZScores_Interaction.csv"
|
||||
inputFile[3] <- "../Exp3/ZScores/ZScores_Interaction.csv"
|
||||
inputFile[4] <- "../Exp4/ZScores/ZScores_Interaction.csv"
|
||||
inputFile[5] <- "../Exp5/ZScores/ZScores_Interaction.csv"
|
||||
library("gplots")
|
||||
|
||||
# Load arguments
|
||||
args <- commandArgs(TRUE)
|
||||
study_info_file <- args[1]
|
||||
ontology_file <- args[2]
|
||||
sgd_terms_tfile <- args[3]
|
||||
all_sgd_terms_csv <- args[4]
|
||||
zscores_file <- args[5]
|
||||
base_dir <- args[6]
|
||||
output_dir <- args[7]
|
||||
study_nums <- args[8:length(args)]
|
||||
|
||||
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
|
||||
lstExpNum= 'asdf'
|
||||
a=0
|
||||
#R can't index data.frame's so some stupid nasty code must be implemented
|
||||
if (file.exists(inputFile[1])){
|
||||
X1 <- (read.csv(file = inputFile[1],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 1
|
||||
Name1 <- labels[1,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
|
||||
|
||||
if (file.exists(inputFile[2])){
|
||||
X2 <- (read.csv(file = inputFile[2],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 2
|
||||
Name2 <- labels[2,2]}
|
||||
if (file.exists(inputFile[3])){
|
||||
X3 <- (read.csv(file = inputFile[3],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 3
|
||||
Name3 <- labels[3,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
|
||||
|
||||
if (file.exists(inputFile[4])){
|
||||
X4 <- (read.csv(file = inputFile[4],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 4
|
||||
Name4 <- labels[4,2]}
|
||||
if (file.exists(inputFile[5])){
|
||||
X5 <- (read.csv(file = inputFile[5],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 5
|
||||
Name5 <- labels[5,2]}
|
||||
#--------------------------Standard Names-------------------------------------------------------
|
||||
#ExpNames= c('Exp1','Exp2','Exp3','Exp4','Exp5')
|
||||
#expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
|
||||
#expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
|
||||
#expNumber3<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[3]))
|
||||
#expNumber4<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[4]))
|
||||
#Name1= ExpNames[1]
|
||||
#Name2= ExpNames[2]
|
||||
#Name3= ExpNames[3]
|
||||
#Name4= ExpNames[4]
|
||||
#Name5= ExpNames[5]
|
||||
print('52')
|
||||
#----------------------TABLES------------------------------------------------------------
|
||||
#import standard tables used in Sean's code That should be copied to each ExpStudy
|
||||
ontology_obo_input <- "gene_ontology_edit.obo" #paste0(Wstudy,"/Code/gene_ontology_edit.obo") # Args[5]
|
||||
GOtermstab_file <- "go_terms.tab"
|
||||
GO_ID_Arg <-"All_SGD_GOTerms_for_QHTCPtk.csv" #All_SGD_GOTerms.csv #Args[7]
|
||||
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE)
|
||||
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#----------------------------------------------------------------------------
|
||||
|
||||
#The directory to put the results into. Sean puts this in the 'Home' directory
|
||||
subDir <- "../TermSpecificHeatmaps"
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
outputpath <- "../TermSpecificHeatmaps/" #paste0(Wstudy,"/TermSpecificHeatmaps/")
|
||||
print(outputpath)
|
||||
print('73')
|
||||
#-----------------------------------------------------------------------------------
|
||||
XX3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
XX3 <- read.csv(file=all_sgd_terms_csv,stringsAsFactors=FALSE,header = TRUE)
|
||||
XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
|
||||
XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
|
||||
XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
|
||||
print('80')
|
||||
#rm(X,)
|
||||
#Exp1--------------------------------------------------
|
||||
if (file.exists(inputFile[1])){
|
||||
print('84')
|
||||
|
||||
# Load input files
|
||||
for (study_num in study_nums) {
|
||||
input_file <- file.path(base_dir,paste('Exp',study_num),zscores_file)
|
||||
if (file.exists(input_file)) {
|
||||
assign(paste(X, study_num), read.csv(file=input_file,stringsAsFactors=FALSE,header=TRUE))
|
||||
assign(paste(Name, study_num), labels[study_num,2])
|
||||
}
|
||||
}
|
||||
|
||||
for (study_num in study_nums) {
|
||||
eval(paste("function",study_num))
|
||||
}
|
||||
|
||||
if (length(study_nums) > 0) {
|
||||
X1$ORF <- X1$OrfRep
|
||||
X1$ORF <- gsub("_1","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_2","",x=X1$ORF)
|
||||
@@ -121,11 +84,9 @@ if (file.exists(inputFile[1])){
|
||||
|
||||
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
|
||||
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
|
||||
print('110')
|
||||
}
|
||||
print('112')
|
||||
#Exp2------------------------------------------------------------------------
|
||||
if (file.exists(inputFile[2])){
|
||||
|
||||
if (length(study_nums) > 1) {
|
||||
X2$ORF <- X2$OrfRep
|
||||
X2$ORF <- gsub("_1","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_2","",x=X2$ORF)
|
||||
@@ -151,11 +112,9 @@ if (file.exists(inputFile[2])){
|
||||
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
|
||||
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
|
||||
X <- cbind(X1,X2)
|
||||
print('file2-140')
|
||||
}
|
||||
print('142')
|
||||
#Exp3--------------------------------------------------------------
|
||||
if (file.exists(inputFile[3])){
|
||||
|
||||
if (length(study_nums) > 2) {
|
||||
X3$ORF <- X3$OrfRep
|
||||
X3$ORF <- gsub("_1","",x=X3$ORF)
|
||||
X3$ORF <- gsub("_2","",x=X3$ORF)
|
||||
@@ -183,8 +142,7 @@ if (file.exists(inputFile[3])){
|
||||
X <- cbind(X,X3)
|
||||
}
|
||||
|
||||
#Exp4
|
||||
if (file.exists(inputFile[4])){
|
||||
if (length(study_nums) > 3) {
|
||||
X4$ORF <- X4$OrfRep
|
||||
X4$ORF <- gsub("_1","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_2","",x=X4$ORF)
|
||||
@@ -212,8 +170,7 @@ if (file.exists(inputFile[4])){
|
||||
X <- cbind(X,X4)
|
||||
}
|
||||
|
||||
#Exp5--------------------------------------------------------------
|
||||
if (file.exists(inputFile[5])){
|
||||
if (length(study_nums) > 4) {
|
||||
X5$ORF <- X5$OrfRep
|
||||
X5$ORF <- gsub("_1","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_2","",x=X5$ORF)
|
||||
@@ -240,12 +197,11 @@ if (file.exists(inputFile[5])){
|
||||
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
|
||||
X <- cbind(X,X5)
|
||||
}
|
||||
print('after5-229')
|
||||
X$ORF <- X$OrfRep_X1
|
||||
part1=1
|
||||
|
||||
if (file.exists(inputFile[1])& file.exists(inputFile[2])){
|
||||
print('234 ifFile2')
|
||||
X$ORF <- X$OrfRep_X1
|
||||
|
||||
if (length(study_nums) > 1) {
|
||||
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
@@ -263,7 +219,7 @@ if (file.exists(inputFile[1])& file.exists(inputFile[2])){
|
||||
}
|
||||
|
||||
|
||||
if (file.exists(inputFile[3])){
|
||||
if (length(study_nums) > 2) {
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
|
||||
@@ -286,7 +242,7 @@ if (file.exists(inputFile[3])){
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
}
|
||||
|
||||
if (file.exists(inputFile[4])){
|
||||
if (length(study_nums) > 3) {
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
@@ -315,7 +271,7 @@ if (file.exists(inputFile[4])){
|
||||
}
|
||||
|
||||
|
||||
if (file.exists(inputFile[5])){
|
||||
if (length(study_nums) > 4) {
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
@@ -350,8 +306,6 @@ if (file.exists(inputFile[5])){
|
||||
}
|
||||
|
||||
|
||||
#-----------------------------------------------------------------------------------------
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
@@ -434,27 +388,15 @@ scale_colour_Publication <- function(...){
|
||||
}
|
||||
|
||||
|
||||
|
||||
#www.geneontology.org/ontology/gene_ontology_edit.obo file
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
print(Ontology)
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
print('429')
|
||||
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
|
||||
#Terms is the GO term list jwr moved up to TAABLES
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------------------------------------
|
||||
#BIG LOOP BIG LOOP ------------------------------------------------------
|
||||
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
||||
for(s in 1:dim(XX3)[1]){
|
||||
@@ -465,24 +407,19 @@ for(s in 1:dim(XX3)[1]){
|
||||
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
||||
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
||||
#only make plots if parent term has fewer than 500 children
|
||||
|
||||
print('for(s in 1:dim(XX3)[1]){ -454')
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
|
||||
if(length(GOTerm_parent) > 100){
|
||||
#print(length(GOTerm_parent))
|
||||
next()
|
||||
}
|
||||
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
if(Parent_Size < 2){
|
||||
next()
|
||||
}
|
||||
if(Parent_Size > 2000){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
print('pdf2000 469')
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -509,7 +446,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
}
|
||||
|
||||
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -535,7 +472,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -561,7 +498,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 200 && Parent_Size <= 500){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -587,7 +524,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 100 && Parent_Size <= 200){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -613,7 +550,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 60 && Parent_Size <= 100){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -639,7 +576,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 30 && Parent_Size <= 60){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -679,7 +616,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 3 && Parent_Size <= 30){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -718,7 +655,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size == 2){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
@@ -753,13 +690,7 @@ for(s in 1:dim(XX3)[1]){
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
print('740')
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user