Commit earlier refactoring

This commit is contained in:
2024-07-29 11:44:45 -04:00
parent 29cbce0754
commit 527068e683
294 changed files with 5524008 additions and 0 deletions

BIN
workflow/.old/ScriptTemplates/.DS_Store vendored Normal file

Binary file not shown.

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python
# This code is to concatenate the batch GO Term Finder results (.tsv) generated from batch GTF perl code(Chris Johnson, U of Tulsa) into a list table
import sys, os, string, glob
# return the file list
def list_all_files(Path):
list_all_files = []
list_all_files = glob.glob(Path +'/*.txt.tsv')
return list_all_files
# Main function
try:
data_file_Path = sys.argv[1]
output_file_Path = sys.argv[2]
except:
print 'Usage: python Concatenate_GTF_results.py /datasetPath /outputFilePath_and_Name'
print 'Data file not found, error in given directory'
sys.exit(1)
# Open the output file
try:
output = open(output_file_Path, 'w')
except OSError:
print 'output file error'
# get all the GTF result files in given directory
File_list = []
File_list = list_all_files(data_file_Path)
File_list.sort()
i = 0
for file in File_list:
#parse the file names given in absolute path
file_name = file.strip().split('/')[-1]
file_name = file_name.rstrip('.txt.tsv')
# function to read tsv files from a given directory
#open the file
data = open(file,'r')
#reading the label line
labelLine = data.readline()
label = labelLine.strip().split('\t')
#write the label
#updates2010July26: update following label writing code
if i == 0:
# output.write('cluster origin')
for element in label:
output.write(element)
output.write('\t')
i = i + 1
#updates2010July26 End
#switch to the next line
output.write('\n')
#read the GO terms
GOTermLines = data.readlines()
for GOTerm in GOTermLines:
GOTerm = GOTerm.strip().strip('\t')
if GOTerm != '':
#updates2010July26: remove the code to write the first column 'REMc cluster ID'
#output.write(file_name)
#output.write('\t')
##updates2010July26 update end
output.write(GOTerm + '\n')
#output.write('\n')
output.close()
print("local Process copy of Concatenate_GTF_results.py")

View File

@@ -0,0 +1,71 @@
#!/usr/bin/env python
# This code is to concatenate the batch GO Term Finder results (.tsv) generated from batch GTF perl code(Chris Johnson, U of Tulsa) into a list table
import sys, os, string, glob
# return the file list
def list_all_files(Path):
list_all_files = []
list_all_files = glob.glob(Path +'/*.txt.tsv')
return list_all_files
# Main function
try:
data_file_Path = sys.argv[1]
output_file_Path = sys.argv[2]
except:
print 'Usage: python Concatenate_GTF_results.py /datasetPath /outputFilePath_and_Name'
print 'Data file not found, error in given directory'
sys.exit(1)
# Open the output file
try:
output = open(output_file_Path, 'w')
except OSError:
print 'output file error'
# get all the GTF result files in given directory
File_list = []
File_list = list_all_files(data_file_Path)
File_list.sort()
i = 0
for file in File_list:
#parse the file names given in absolute path
file_name = file.strip().split('/')[-1]
file_name = file_name.rstrip('.txt.tsv')
# function to read tsv files from a given directory
#open the file
data = open(file,'r')
#reading the label line
labelLine = data.readline()
label = labelLine.strip().split('\t')
#write the label
#updates2010July26: update following label writing code
if i == 0:
# output.write('cluster origin')
for element in label:
output.write(element)
output.write('\t')
i = i + 1
#updates2010July26 End
#switch to the next line
output.write('\n')
#read the GO terms
GOTermLines = data.readlines()
for GOTerm in GOTermLines:
GOTerm = GOTerm.strip().strip('\t')
if GOTerm != '':
#updates2010July26: remove the code to write the first column 'REMc cluster ID'
#output.write(file_name)
#output.write('\t')
##updates2010July26 update end
output.write(GOTerm + '\n')
#output.write('\n')
output.close()

View File

@@ -0,0 +1,188 @@
#GTA (GoTermAveraging) Starting (Working Directory is /Code) All paths relative to /Code
#Currently must run for each Experiment. _Future could loop thru the number of experiments involved in study. JWR
Wstudy= getwd()
if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){
inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv'
expName= "Exp1"
dir.create("../GTAresults/Exp1")
}
if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){
inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv'
expName[2]= "Exp2"
dir.create("../GTAresults/Exp2")
}
if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){
inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv'
expName[3]= "Exp3"
dir.create("../GTAresults/Exp3")
}
if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){
inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv'
expName[4]= "Exp4"
dir.create("../GTAresults/Exp4")
}
outputPathGTA= "../GTAresults"
#dir.create(outPathGTA)
library("stringr")
library("org.Sc.sgd.db")
library("plyr")
#build in command args to apply this code to a given !!results sheet
SGD_Terms_file <- "../Code/go_terms.tab" #ArgsScore[2]
#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
SGD_features_file <- "../Code/gene_association.sgd" #ArgsScore[3]
#R and Rstudio have issues: The for loop(189) seemed to fail to evaluate the paste (or paste0) to build the inputFile inside the for loop and bail as the second loop began. This crude fix below, seems to have alleviated the failure to loop problem at least for now. Also for some annoying reason, the underscores between word are sometimes not shown when they exist. No ryme of reason!!!
#Begin for loop for experiments in this study-----------------ZScores_Interaction.csv
for(m in 1:length(inputFile)){
#inputFile <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1]
X <- read.csv(file = inputFile[m],stringsAsFactors=FALSE,header = TRUE)
if(colnames(X)[1] == "OrfRep"){
colnames(X)[1] <- "ORF"
}
#Terms is the GO term list
Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
#Gene_Association is the gene association to GO term file
Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Get the ORF names associated with each gene/GO term
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Get the numeric GO ID for matching
Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#get all unique GO terms
GO_Terms <- unique(Gene_Association$GO_ID)
#create a character vector with just the ColNames of the input file to store the scores for each GO term
Col_Names_X <- colnames(X)
#create a data_frame with header from input_file
GO_Term_Averages <- X[0,]
#fill table with NAs same length as number of GO terms
GO_Term_Averages[1:length(GO_Terms),] <- NA
#change the first and second col names to GO_ID and Term
colnames(GO_Term_Averages)[1] <- "GO_ID"
colnames(GO_Term_Averages)[2] <- "Term"
#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
GO_Term_Averages$Ontology <- NA
GO_Term_Averages$NumGenes <- NA
GO_Term_Averages$AllPossibleGenes <- NA
GO_Term_Averages$Genes <- NA
GO_Term_Averages$ORFs <- NA
#create a data.frame for the standard deviation info
GO_Term_SD <- X[0,]
GO_Term_SD[1:length(GO_Terms),] <- NA
colnames(GO_Term_SD)[1] <- "GO_ID"
colnames(GO_Term_SD)[2] <- "Term"
#Loop for each GO term to get an average L and K Z score
for(i in 1:length(GO_Terms)){
#get the GO_Term
ID <- GO_Terms[i]
#Get data.frame for all genes associated to the GO Term
ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
#get a vector of just the gene names
ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
if(length(unique(ID_AllGenes_vector)) > 4000){
next()
}
#get the GO term character description where numeric Terms ID matches GO_Term's ID
GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
#get the Z scores for all genes in the GO_ID
Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
#get the Gene names and ORFs for the term
GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
#dataframe to report the averages for a GO term
#get the GO ID
GO_Term_Averages$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_Averages$Term[i] <- GO_Description_Term
#get total number of genes annotated to the Term that we have in our library
GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
#get total number of genes annotated to the Term in SGD
GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
#get the ontology of the term
GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
#calculate the average score for every column
for(j in 3:length(X[1,])){
GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
#also calculate same values for the SD
GO_Term_SD$GO_ID[i] <- as.character(ID)
#get the term name
GO_Term_SD$Term[i] <- GO_Description_Term
#calculate column scores for SD
for(j in 3:length(X[1,])){
GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
}
}
#add either _Avg or _SD depending on if the calculated score is an average or SD
colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
#combine the averages with the SDs to make one big data.frame
X2 <- cbind(GO_Term_Averages,GO_Term_SD)
#test[ , order(names(test))]
X2 <- X2[,order(names(X2))]
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
#create output file
write.csv(X2,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_All.csv",sep=""),row.names=FALSE)
#remove NAs
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
#identify redundant GO terms
for(i in 1:length(X3[,1])){
#loop through each GO term - get term
GO_term_ID <- as.character(X3$GO_ID_Avg[i])
#get term in the X3
X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
#get anywhere that has the same number K_Avg value
X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
if(length(X3_Temp2[,1]) > 1){
if(length(unique(X3_Temp2$Genes_Avg)) == 1){
X3_Temp2 <- X3_Temp2[1,]
}
}
if(i == 1){
Y <- X3_Temp2
}
if(i > 1){
Y <- rbind(Y,X3_Temp2)
}
}
Y1 <- unique(Y)
write.csv(Y1,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
write.csv(Y2,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
write.csv(Y3,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
write.csv(Y4,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
write.csv(Y5,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
#End of 'for loop'
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,94 @@
#JoinCSVfilesREMc.R
#The input files should be entered in order from the greatest number of rows(Orfs) to least.
Args <- commandArgs(TRUE)
Wstudy= getwd()
dir.create("../JoinFiles") #(paste0(Wstudy,"/JoinFiles"))
outDir <- "../JoinFiles" #paste0(Wstudy,"/JoinFiles")
print(outDir)
#ArgsJoin= 'asdf'
#ArgsJoin[1]= "../JoinFiles"
#ArgsJoin[2]= "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
#ArgsJoin[3]= "../Exp2/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv")
#outDir <- ArgsJoin[1] #Output Directory
print(length(Args)) #display the number of arguments on terminal
#open required library for the join function (libraries must already be install on R)
library(plyr)
library(dplyr)
library(sos)
#read in the files for your experiment and
#join the two files at a time as a function of how many Args, list the larger file first ? in this example X2 has the larger number of genes.
#if X1 has a larger number of genes, switch the order of X1 and X2
if(length(Args)==2) {
X1 <- read.csv(file= Args[1],stringsAsFactors = FALSE)
X2 <- read.csv(file= Args[2],stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1') #remove 'Gene.1 column
headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums
headSel2 = select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums
}else if(length(Args)==3){
X1 <- read.csv(file= Args[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file= Args[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file= Args[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
X <- join(X,X3,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1',-'Gene.2')
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
headSel2 = select(headSel2, -'Gene.1',-'Gene.2')
}else if(length(Args==4)){
X1 <- read.csv(file= Args[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file= Args[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file= Args[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
X4 <- read.csv(file= Args[4],stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
X <- join(X,X3,by="OrfRep")
X <- join(X,X4,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1',-'Gene.2',-'Gene.3')
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
headSel2 = select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3')
}
#headSel$contains('Z_Shift') %>% replace_na(0.001)
headers<-colnames(headSel)
i=0
for(i in 1:length(headers)){
if(grepl("Shift",headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.001
}
if(grepl("Z_lm_",headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.0001
}
}
REMcRdy= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_'))
shiftOnly= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift'))
combI= headSel2 #Starting Template orf, Genename columns
headersRemc<-colnames(REMcRdy)
#Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
for(i in 3:length(headersRemc)){
combI=cbind.data.frame(combI, shiftOnly[i])
combI=cbind.data.frame(combI, REMcRdy[i])
}
write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"))
write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"))
write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"))

View File

@@ -0,0 +1,287 @@
#JoinInteractExps3dev.R
#User prompt for std Value
cat("Enter a Standard Deviation value to filter data to be used by REMc ?\n")
inp <- readLines(file("stdin"), n = 1L)
cat(paste("Standard Deviation Value is", inp, "\n"))
#set std deviation multiplier default if no user entry
if(is.numeric(inp)){
std= inp
}else{std= 2}
#The input files should be entered in order from the greatest number of rows(Orfs) to least.
#Args <- commandArgs(TRUE)
#if(length(Args)==0){
# std=0
#}else{
# std=Args[1]
#}
print(paste("SD=",std))
#Wstudy= getwd()
#dir.create("../JoinFiles") #(paste0(Wstudy,"/JoinFiles"))
#outDir <- "../JoinFiles" #paste0(Wstudy,"/JoinFiles")
#dir.create("../REMc") #(paste0(Wstudy,"/JoinFiles"))
outDir <- "./" #../REMc" #paste0(Wstudy,"/JoinFiles")
print(outDir)
#Args= 'asdf'
#Args[1]= "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
#Args[2]= "../Exp2/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv")
#Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
#Args[4]= "../Exp4/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv")
#Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp3/ZScores/ZScores_Interaction.csv")
inputFile= 'asdf'
if (file.exists('../Exp1/ZScores/ZScores_Interaction.csv')){
inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv'
}
if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){
inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv'
}
if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){
inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv'
}
if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){
inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv'
}
#Args= inputFile
#outDir <- ArgsJoin[1] #Output Directory
print(length(inputFile)) #display the number of arguments on terminal
#open required library for the join function (libraries must already be install on R)
library(plyr)
library(dplyr)
library(sos)
#read in the files for your experiment and
#join the two files at a time as a function of how many inputFile, list the larger file first ? in this example X2 has the larger number of genes.
#if X1 has a larger number of genes, switch the order of X1 and X2
if(length(inputFile)==2) {
X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE)
X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1') #remove 'Gene.1 column
headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums
headSel2 = select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums
}else if(length(inputFile)==3){
X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
X <- join(X,X3,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1',-'Gene.2')
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
headSel2 = select(headSel2, -'Gene.1',-'Gene.2')
}else if(length(inputFile)==4){
X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
X4 <- read.csv(file= inputFile[4],stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
X <- join(X1,X2,by="OrfRep")
X <- join(X,X3,by="OrfRep")
X <- join(X,X4,by="OrfRep")
OBH=X[,order(colnames(X))] #OrderByHeader
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
headSel= select(headSel, -'Gene.1',-'Gene.2',-'Gene.3')
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
headSel2 = select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3')
}
#headSel$contains('Z_Shift') %>% replace_na(0.001)
headers<-colnames(headSel)
i=0
for(i in 1:length(headers)){
if(grepl("Shift",headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.001
}
if(grepl("Z_lm_",headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.0001
}
}
#2SD option code to exclude Z_lm values less than 2 standard Deviations
#+++++++++++++
REMcRdy= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_'))
shiftOnly= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift'))
#Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE,sep= ",")
#R can't do math reliably within a nested loop, Therefore requires a complex text based work around+++++++++++++++
#Do it with text search and replace or modify.
#R is just a badly designed or rather badly evolved mutant language!!! Full of problems and inconsistencies!!!
#R causes huge waste of time.
#R is not worth fixing. It needs to be discontinued both in use and development before it does more harm.
#Using Text search grepl to relabel headers+++++++++++++++++++++++++++++++++++++++++
REMcRdyHdr= colnames(REMcRdy)
REMcRdyLabels= 'asdf'
shftHdr= colnames(shiftOnly)
shiftLabels='asdf'
shiftLabels[1:2]<-shftHdr[1:2]
REMcRdyLabels[1:2]<-REMcRdyHdr[1:2]
for(i in 3:(length(shftHdr))){
if(i==3){
shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3])
REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) }
if(i==5){
shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5])
REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5])
}
if(i==7){
shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7])
REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7])
}
if(grepl(".1",shftHdr[i],fixed=true)){
shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i])
REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])}
if (grepl(".2",shftHdr[i],fixed=true)){
shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i])
REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])}
if(grepl(".3",shftHdr[i],fixed=true)){
shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i])
REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])}
}
for(i in 3:(length(REMcRdyLabels))){
j=as.integer(i)
REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j])
shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j])
}
colnames(shiftOnly)<- shiftLabels
colnames(REMcRdy)<- REMcRdyLabels
#+++++++++++++++++++++++
combI= headSel2 #Starting Template orf, Genename columns
#headersRemc<-colnames(REMcRdy)
#Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
for(i in 3:length(colnames(REMcRdy))){
combI=cbind.data.frame(combI, shiftOnly[i])
combI=cbind.data.frame(combI, REMcRdy[i])
}
#Effort to use R vectorization select data greater than 2 Standard deviations.
#R can't handle variables in the...$(varible). The column names must be explicit. Just another R issue of thoughtlessness.
#REMcGT2<- REMcRdy[abs(REMcRdy$(REMcRdyLabels[3]))>=2 |abs(REMcRdy$REMcRdyLabels[4])>=2 |abs(REMcRdy$REMcRdyLabels[5])>=2 |abs(REMcRdy$REMcRdyLabels[6])>=2]
#another failed R work-around
#REMcGT2<- REMcRdy[abs(REMcRdy[,3])>=2 | abs(REMcRdy[,4])>=2 | abs(REMcRdy[,5])>=2 | abs(REMcRdy[,6])>=2]
#A simple task made difficult by R
#Well R doesn't handle loops well but I may have to try some extreme measures.
#a Fundamental reason RisFuckedUp StackOverflow ...$ and everything else (for, if, +,^,...) that is treated as a function can not accept argument which must be evaluated.
#Therefore just another reason R is shit! This alone should qualify the language to the garbage bin.
#Working in R is a constant battle to workaround its shortsighted deficits.
#REMcGT2<- REMcRdy[abs(REMcRdy$(REMcRdyLabels[3]))>=2 |abs(REMcRdy$REMcRdyLabels[4])>=2 |abs(REMcRdy$REMcRdyLabels[5])>=2 |abs(REMcRdy$REMcRdyLabels[6])>=2,]
#Vectorization with variable adaptation of Sean's string explicit code will not work because R cannot accept variables. Piss poor language.
#So what the hell to do???
#Basically we just need to 'OR' all the column vectors that are >=2 'True' and let that be a binary vector to apply across the data.frame REMcRdy
#R doesn't allow a vector of vectors!
#I guess the only way to do this simple task in R is to create a set of explicit variables,
#then fill each variable with the binary results, next 'OR those results and use that vector of bolean results to select values from data.frame.
#This is totally stupid crazy. But this is R.
#R violates all the fundamental rules of automation. R Requires specific explicit parameters.
#Since the largest REMc is four experiments make eight discrete vectors(K and L for each of the experiments).
#The totally stupid work-around for R
Vec1= NA
Vec2= NA
Vec3= NA
Vec4= NA
Vec5= NA
Vec6= NA
Vec7= NA
Vec8= NA
if(length(REMcRdy)== 6){
Vec1= abs(REMcRdy[,3])>=std
Vec2= abs(REMcRdy[,4])>=std
Vec3= abs(REMcRdy[,5])>=std
Vec4= abs(REMcRdy[,6])>=std
bolVec= Vec1 | Vec2
REMcRdyGT2= REMcRdy[bolVec,1:2]
REMcRdyGT2[ ,3:6]= REMcRdy[bolVec,3:6]
shiftOnlyGT2= shiftOnly[bolVec,1:2]
shiftOnlyGT2[ ,3:6]= shiftOnly[bolVec,3:6]
}
if(length(REMcRdy)== 8){
Vec1= abs(REMcRdy[,3])>=std
Vec2= abs(REMcRdy[,4])>=std
Vec3= abs(REMcRdy[,5])>=std
Vec4= abs(REMcRdy[,6])>=std
Vec5= abs(REMcRdy[,7])>=std
Vec6= abs(REMcRdy[,8])>=std
bolVec= Vec1 | Vec2 |Vec3
REMcRdyGT2= REMcRdy[bolVec,1:2]
REMcRdyGT2[ ,3:8]= REMcRdy[bolVec,3:8]
shiftOnlyGT2= shiftOnly[bolVec,1:2]
shiftOnlyGT2[ ,3:8]= shiftOnly[bolVec,3:8]
}
if(length(REMcRdy)== 10){
Vec1= abs(REMcRdy[,3])>=std
Vec2= abs(REMcRdy[,4])>=std
Vec3= abs(REMcRdy[,5])>=std
Vec4= abs(REMcRdy[,6])>=std
Vec5= abs(REMcRdy[,7])>=std
Vec6= abs(REMcRdy[,8])>=std
Vec7= abs(REMcRdy[,9])>=std
Vec8= abs(REMcRdy[,10])>=std
bolVec= Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8
REMcRdyGT2= REMcRdy[bolVec,1:2]
REMcRdyGT2[ ,3:10]= REMcRdy[bolVec,3:10]
shiftOnlyGT2= shiftOnly[bolVec,1:2]
shiftOnlyGT2[ ,3:10]= shiftOnly[bolVec,3:10]
}
if(std!=0){
REMcRdy= REMcRdyGT2 #[,2:length(REMcRdyGT2)]
shiftOnly= shiftOnlyGT2 #[,2:length(shiftOnlyGT2)]
}
if(std==0){
REMcRdy= REMcRdy #[,2:length(REMcRdy)]
shiftOnly= shiftOnly #[,2:length(shiftOnly)]
}
#Yet again R creates a problem, placing hidden "" around the header names. The following
# is intended to remove those quote so that the "" donot blow up the Java REMc.
#Use ,quote=F in the write.csv statement to fix R output file.
print(paste("SD=",std))
print(getwd())
#write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"),row.names = FALSE)
write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"),row.names = FALSE, quote=F)
write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"),row.names = FALSE, quote=F)
#LabelStd <- read.table(file= "./Parameters.csv",stringsAsFactors = FALSE,sep= ",")
pwd=getwd()
print(getwd)
LabelStd<- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE)
print(std)
LabelStd[,4]= as.numeric(std)
write.csv(LabelStd,file="../Code/Parameters.csv",row.names = FALSE)
write.csv(LabelStd,file="../Code/StudyInfo.csv",row.names = FALSE)
cat(paste("Standard Deviation Value was set as", std, "\n"))

View File

@@ -0,0 +1,340 @@
#4. GTA K Pairwise Compare
#Sean's Arg convention for info only
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
#Arg1 is Average_GOTerms_All_1.csv
#Arg2 is the name to give GTA results 1
#Arg3 is Average_GOTerms_All2.csv
#Arg4 is the name to give GTA results 2
#Arg5 is the directory to put the files into
#Arg 1 is the GTA results 1
#Arg 2 is the name of GTA results 1 to print in the results
#Arg 3 is GTA results 3
#Arg 4 is the name of GTA results 2 to print in the results
#Arg 4 is the name of GTA results 2 to print in the results
#arg 5 is the directory to put the results into (and create that directory if needed)
library(ggplot2)
library(plotly)
library(htmlwidgets)
library(extrafont)
library(grid)
library(ggthemes)
Args <- commandArgs(TRUE)
expNm= Args[1] #"Exp3"
expNm[2]= Args[2] #"Exp4"
expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
labels <- read.delim("StudyInfo.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
Name1 <- labels[expNumber1,1] #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
Name2 <- labels[expNumber2,1] #ssArg4
Wstudy= getwd()
input_file1 <- paste0("../GTAresults/",expNm[1],"/Average_GOTerms_All.csv" ) #Args[1]
input_file2 <- paste0("../GTAresults/",expNm[2],"/Average_GOTerms_All.csv" ) #Args[3]
pairDirL= paste0("../GTAresults/","PairwiseCompareL_",expNm[1],"-",expNm[2])
pairDirK= paste0("../GTAresults/","PairwiseCompareK_",expNm[1],"-",expNm[2])
outPathGTAcompare= "../GTAresults/PairwiseCompareL" #paste0(Wstudy,"/GTAresults/PairwiseCompareL)
outPathGTAcompare[2]= "../GTAresults/PairwiseCompareK" #paste0(Wstudy,"/GTAresults/PairwiseCompareK")
#dir.create(outPathGTAcompare[1])
dir.create(pairDirL) #(outPathGTAcompare[1])
dir.create(pairDirK) #(outPathGTAcompare[2])
#define the output path (as fourth argument from Rscript)
outputpath <- pairDirK #outPathGTAcompare[2] #Args[5]
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#1
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("127")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#2
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#2-170")
#2
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#3
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#3")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#4")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#5
#only output GTA terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#5")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#6
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#6")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#7
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#7")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#8
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#8")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
print("write csv files")
write.csv(x=X,file = paste(getwd(),"/",outputpath,"/","All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
#End of GTA Pairwise compare for K values

View File

@@ -0,0 +1,632 @@
#4.This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
#Arg1 is the first Experiment or a user ordered pair (i.e., Exp3)
#Arg2 is the second Experiment or a user ordered pair (i.e., Exp4)
#Note user can put whatever names they wish for the Canonical Exp1..4 in the ../Code/ExpLabels.txt spread sheet
#which correlates the canonical Exp Name with the user stylized label for plots and output files.
library(ggplot2)
library(plotly)
library(htmlwidgets)
library(extrafont)
library(grid)
library(ggthemes)
Args <- commandArgs(TRUE)
expNm= Args[1] #i.e., "Exp3"
expNm[2]= Args[2] #i.e., "Exp4"
expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
Labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
Name1 <- Labels[expNumber1,2] #ssArg2 These are now supplied by Code/ExpLabels.csv which is user edited
Name2 <- Labels[expNumber2,2] #ssArg4
Wstudy= getwd()
input_file1 <- paste0("../GTAresults/",expNm[1],"/Average_GOTerms_All.csv" ) #Args[1]
input_file2 <- paste0("../GTAresults/",expNm[2],"/Average_GOTerms_All.csv" ) #Args[3]
pairDirL= paste0("../GTAresults/","PairwiseCompareL_",expNm[1],"-",expNm[2])
pairDirK= paste0("../GTAresults/","PairwiseCompareK_",expNm[1],"-",expNm[2])
outPathGTAcompare= "../GTAresults/PairwiseCompareL" #paste0(Wstudy,"/GTAresults/PairwiseCompareL)
outPathGTAcompare[2]= "../GTAresults/PairwiseCompareK" #paste0(Wstudy,"/GTAresults/PairwiseCompareK")
#dir.create(outPathGTAcompare[1])
dir.create(pairDirL) #(outPathGTAcompare[1])
dir.create(pairDirK) #(outPathGTAcompare[2])
###########BEGIN PAIRWISE L-----LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
#define the output path (as fourth argument from Rscript)
outputpath <- pairDirL #outPathGTAcompare[1] #Args[5]
#outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
print("39")
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
print(117)
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("130")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
print("155")
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#2-174")
#2
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
#3
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#3")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#4")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#only output GTA terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
#5
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#5")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#6
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#6")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
#7
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#7")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#8
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#8")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
print("write csv files L")
write.csv(x=X,file = paste(outputpath,"/All_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
#End of L GTA Pairwise Compare
###########BEGIN PAIRWISE K-----KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
#define the output path (as fourth argument from Rscript)
outputpath <- pairDirK #outPathGTAcompare[2] #Args[5]
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
#1
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("127")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#2
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#2-170")
#2
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#3
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#3")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#4")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#5
#only output GTA terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#5")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#6
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#6")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#7
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#7")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
#8
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#8")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
print("write csv files")
write.csv(x=X,file = paste(getwd(),"/",outputpath,"/","All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
#End of GTA Pairwise compare for K values

View File

@@ -0,0 +1,332 @@
#4.This R script performs GTA L Pairwise Compares for user specified pairs of Experiments
#Arg1 is the first Experiment or a user ordered pair (i.e., Exp3)
#Arg2 is the second Experiment or a user ordered pair (i.e., Exp4)
#Note user can put whatever names they wish for the Canonical Exp1..4 in the ../Code/ExpLabels.txt spread sheet
#which correlates the canonical Exp Name with the user stylized label for plots and output files.
library(ggplot2)
library(plotly)
library(htmlwidgets)
library(extrafont)
library(grid)
library(ggthemes)
Args <- commandArgs(TRUE)
expNm= Args[1] #i.e., "Exp3"
expNm[2]= Args[2] #i.e., "Exp4"
expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
labels <- read.delim("StudyInfo.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
Name1 <- labels[expNumber1,1] #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
Name2 <- labels[expNumber2,1] #ssArg4
Wstudy= getwd()
input_file1 <- paste0("../GTAresults/",expNm[1],"/Average_GOTerms_All.csv" ) #Args[1]
input_file2 <- paste0("../GTAresults/",expNm[2],"/Average_GOTerms_All.csv" ) #Args[3]
pairDirL= paste0("../GTAresults/","PairwiseCompareL_",expNm[1],"-",expNm[2])
pairDirK= paste0("../GTAresults/","PairwiseCompareK_",expNm[1],"-",expNm[2])
outPathGTAcompare= "../GTAresults/PairwiseCompareL" #paste0(Wstudy,"/GTAresults/PairwiseCompareL)
outPathGTAcompare[2]= "../GTAresults/PairwiseCompareK" #paste0(Wstudy,"/GTAresults/PairwiseCompareK")
#dir.create(outPathGTAcompare[1])
dir.create(pairDirL) #(outPathGTAcompare[1])
dir.create(pairDirK) #(outPathGTAcompare[2])
###########BEGIN PAIRWISE L-----LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
#define the output path (as fourth argument from Rscript)
outputpath <- pairDirL #outPathGTAcompare[1] #Args[5]
#outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
print("39")
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
print(117)
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("130")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
print("155")
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#2-174")
#2
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
#3
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#3")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#4
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#4")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#only output GTA terms where average score is still above 2 after subtracting the SD
#Z1 will ID aggravators, Z2 alleviators
Z1 <- X
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
Z2 <- X
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
#5
X_abovethreshold <- X[!(is.na(X$Overlap)),]
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#5")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#6
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#6")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
X_abovethreshold$X1_Rank <- NA
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
X_abovethreshold$X2_Rank <- NA
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
#7
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#7")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
#8
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
theme_Publication_legend_right()
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
gg
dev.off()
pgg <- ggplotly(gg)
#pgg
print("#8")
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
print(fname)
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
print("write csv files L")
write.csv(x=X,file = paste(outputpath,"/All_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
#End of L GTA Pairwise Compare

View File

@@ -0,0 +1,43 @@
# Finding correlation between removing_bad_spots_at_start and Sean's results for YOR1_dF
# Intended to be run in RStudio, setting working directory to source code
library(ggplot2)
library(dplyr)
# Read in the data
sean_data <- read.csv("ZScores_Interaction_Sean.csv",stringsAsFactors = FALSE) SS_ZScores_Interaction.csv
remy_data <- read.csv("ZScores_Interaction_Remy.csv",stringsAsFactors = FALSE) JRcurated_ZScores_Interaction.csv
# We only care about L and K z scores for now, filter out other
# stuff. Then, remove NAs; then, sort so they both match up.
sean_data_filtered <- as_tibble(sean_data)
sean_data_filtered <- select(sean_data_filtered,OrfRep,Z_lm_L,Z_lm_K)
sean_data_filtered <- filter(sean_data_filtered,!is.na(Z_lm_L),!is.na(Z_lm_K))
sean_data_filtered <- arrange(sean_data_filtered,OrfRep)
remy_data_filtered <- as_tibble(remy_data)
remy_data_filtered <- select(remy_data_filtered,OrfRep,Z_lm_L,Z_lm_K)
remy_data_filtered <- filter(remy_data_filtered,!is.na(Z_lm_L),!is.na(Z_lm_K))
remy_data_filtered <- arrange(remy_data_filtered,OrfRep)
# Now everything is lined up, do linear model of trying to predict
# Sean L zscore with Remy L zscore, and make a plot
l_linear_model = lm(sean_data_filtered$Z_lm_L ~ remy_data_filtered$Z_lm_L)
summary(l_linear_model)
l_data = as.data.frame(cbind(remy_data_filtered$Z_lm_L,sean_data_filtered$Z_lm_L)) # Remy is V1 column, Sean is V2
#View(l_data)
l_gg <- ggplot(data=l_data,aes(x=V1,y=V2)) + labs(x="Remy_L",y="Sean_L")+ geom_point()
plot(l_gg)
# Repeat for K zscore
k_linear_model = lm(sean_data_filtered$Z_lm_K ~ remy_data_filtered$Z_lm_K)
summary(k_linear_model)
k_data = as.data.frame(cbind(remy_data_filtered$Z_lm_K,sean_data_filtered$Z_lm_K)) # Remy is V1 column, Sean is V2
#View(l_data)
l_gg <- ggplot(data=k_data,aes(x=V1,y=V2)) + labs(x="Remy_K",y="Sean_K")+ geom_point()
plot(l_gg)

View File

@@ -0,0 +1,765 @@
library("ontologyIndex")
library("ggplot2")
library("RColorBrewer")
library("grid")
library("ggthemes")
#library("plotly")
#library("htmlwidgets")
library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library(gplots)
Wstudy=getwd()
inputFile= 'asdf'
inputFile[1] <- "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
inputFile[2] <- "../Exp2/ZScores/ZScores_Interaction.csv"
inputFile[3] <- "../Exp3/ZScores/ZScores_Interaction.csv"
inputFile[4] <- "../Exp4/ZScores/ZScores_Interaction.csv"
inputFile[5] <- "../Exp5/ZScores/ZScores_Interaction.csv"
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
lstExpNum= 'asdf'
a=0
#R can't index data.frame's so some stupid nasty code must be implemented
if (file.exists(inputFile[1])){
X1 <- (read.csv(file = inputFile[1],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 1
Name1 <- labels[1,1]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
if (file.exists(inputFile[2])){
X2 <- (read.csv(file = inputFile[2],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 2
Name2 <- labels[2,1]}
if (file.exists(inputFile[3])){
X3 <- (read.csv(file = inputFile[3],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 3
Name3 <- labels[3,1]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
if (file.exists(inputFile[4])){
X4 <- (read.csv(file = inputFile[4],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 4
Name4 <- labels[4,1]}
if (file.exists(inputFile[5])){
X5 <- (read.csv(file = inputFile[5],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 5
Name5 <- labels[5,1]}
#--------------------------Standard Names-------------------------------------------------------
#ExpNames= c('Exp1','Exp2','Exp3','Exp4','Exp5')
#expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
#expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
#expNumber3<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[3]))
#expNumber4<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[4]))
#Name1= ExpNames[1]
#Name2= ExpNames[2]
#Name3= ExpNames[3]
#Name4= ExpNames[4]
#Name5= ExpNames[5]
print('52')
#----------------------TABLES------------------------------------------------------------
#import standard tables used in Sean's code That should be copied to each ExpStudy
ontology_obo_input <- "gene_ontology_edit.obo" #paste0(Wstudy,"/Code/gene_ontology_edit.obo") # Args[5]
GOtermstab_file <- "go_terms.tab"
GO_ID_Arg <-"All_SGD_GOTerms_for_QHTCPtk.csv" #All_SGD_GOTerms.csv #Args[7]
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#----------------------------------------------------------------------------
#The directory to put the results into. Sean puts this in the 'Home' directory
subDir <- "../TermSpecificHeatmaps"
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
outputpath <- "../TermSpecificHeatmaps/" #paste0(Wstudy,"/TermSpecificHeatmaps/")
print(outputpath)
print('73')
#-----------------------------------------------------------------------------------
XX3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
print('80')
#rm(X,)
#Exp1--------------------------------------------------
if (file.exists(inputFile[1])){
print('84')
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
X1$ORF <- gsub("_3","",x=X1$ORF)
X1$ORF <- gsub("_4","",x=X1$ORF)
X1$Score_L <- "No Effect"
try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X1$Score_K <- "No Effect"
try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X1 for K and L
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
X1$Rank_L <- rank(X1$Z_lm_L)
X1$Rank_K <- rank(X1$Z_lm_K)
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
print('110')
}
print('112')
#Exp2------------------------------------------------------------------------
if (file.exists(inputFile[2])){
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
X2$ORF <- gsub("_3","",x=X2$ORF)
X2$ORF <- gsub("_4","",x=X2$ORF)
X2$Score_L <- "No Effect"
try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X2$Score_K <- "No Effect"
try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X2
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
X2$Rank_L <- rank(X2$Z_lm_L)
X2$Rank_K <- rank(X2$Z_lm_K)
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
X <- cbind(X1,X2)
print('file2-140')
}
print('142')
#Exp3--------------------------------------------------------------
if (file.exists(inputFile[3])){
X3$ORF <- X3$OrfRep
X3$ORF <- gsub("_1","",x=X3$ORF)
X3$ORF <- gsub("_2","",x=X3$ORF)
X3$ORF <- gsub("_3","",x=X3$ORF)
X3$ORF <- gsub("_4","",x=X3$ORF)
X3$Score_L <- "No Effect"
try(X3[is.na(X3$Z_lm_L),]$Score_L <- "No Growth")
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X3$Score_K <- "No Effect"
try(X3[is.na(X3$Z_lm_K),]$Score_K <- "No Growth")
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X3
X3[is.na(X3$Z_lm_L),]$Z_lm_L <- 0.001
X3[is.na(X3$Z_lm_K),]$Z_lm_K <- 0.001
X3$Rank_L <- rank(X3$Z_lm_L)
X3$Rank_K <- rank(X3$Z_lm_K)
X3 <- X3[order(X3$OrfRep,decreasing = FALSE),]
colnames(X3) <- paste(colnames(X3),"_X3",sep="")
X <- cbind(X,X3)
}
#Exp4
if (file.exists(inputFile[4])){
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
X4$ORF <- gsub("_3","",x=X4$ORF)
X4$ORF <- gsub("_4","",x=X4$ORF)
X4$Score_L <- "No Effect"
try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X4$Score_K <- "No Effect"
try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X4
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
X4$Rank_L <- rank(X4$Z_lm_L)
X4$Rank_K <- rank(X4$Z_lm_K)
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
X <- cbind(X,X4)
}
#Exp5--------------------------------------------------------------
if (file.exists(inputFile[5])){
X5$ORF <- X5$OrfRep
X5$ORF <- gsub("_1","",x=X5$ORF)
X5$ORF <- gsub("_2","",x=X5$ORF)
X5$ORF <- gsub("_3","",x=X5$ORF)
X5$ORF <- gsub("_4","",x=X5$ORF)
X5$Score_L <- "No Effect"
try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
X5$Score_K <- "No Effect"
try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
#express the na data as 0.001 in X5
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
X5$Rank_L <- rank(X5$Z_lm_L)
X5$Rank_K <- rank(X5$Z_lm_K)
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
X <- cbind(X,X5)
}
print('after5-229')
X$ORF <- X$OrfRep_X1
part1=1
if (file.exists(inputFile[1])& file.exists(inputFile[2])){
print('234 ifFile2')
X$ORF <- gsub("_1","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" ]
X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (file.exists(inputFile[3])){
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" ]
#Reorder columns
X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)] #Three
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (file.exists(inputFile[4])){
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" ]
#Reorder columns
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)] #Four
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
if (file.exists(inputFile[5])){
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
X$ORF <- gsub("_4","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
#Reorder columns
X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name5,colnames(X_heatmap))
colnames(X_heatmap)[2] <- "Gene"
}
#-----------------------------------------------------------------------------------------
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
library(scales)
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "right",
legend.direction = "vertical",
legend.key.size= unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face="italic"),
plot.margin=unit(c(10,5,5,5),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
scale_fill_Publication <- function(...){
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
scale_colour_Publication <- function(...){
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
}
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
print(Ontology)
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
print('429')
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list jwr moved up to TAABLES
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------
#BIG LOOP BIG LOOP ------------------------------------------------------
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(XX3)[1]){
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
#GO_ID_Arg <- "GO:0006325"
GO_ID_Arg_loop <- as.character(XX3[s,1])
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
print('for(s in 1:dim(XX3)[1]){ -454')
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
print('pdf2000 469')
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
}
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
if(dim(Genes_Annotated_to_Term)[1] > 2){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
try(heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
ylab = "Gene",
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=GO_Term_Name,
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
print('740')
}
dev.off()
}
}