Commit earlier refactoring
This commit is contained in:
BIN
workflow/.old/ScriptTemplates/.DS_Store
vendored
Normal file
BIN
workflow/.old/ScriptTemplates/.DS_Store
vendored
Normal file
Binary file not shown.
0
workflow/.old/ScriptTemplates/.Rhistory
Normal file
0
workflow/.old/ScriptTemplates/.Rhistory
Normal file
2721
workflow/.old/ScriptTemplates/BU_Legacy/InteractTemplate.R
Normal file
2721
workflow/.old/ScriptTemplates/BU_Legacy/InteractTemplate.R
Normal file
File diff suppressed because it is too large
Load Diff
72
workflow/.old/ScriptTemplates/Concatenate_GTF_results.py
Normal file
72
workflow/.old/ScriptTemplates/Concatenate_GTF_results.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This code is to concatenate the batch GO Term Finder results (.tsv) generated from batch GTF perl code(Chris Johnson, U of Tulsa) into a list table
|
||||
|
||||
import sys, os, string, glob
|
||||
|
||||
|
||||
# return the file list
|
||||
def list_all_files(Path):
|
||||
list_all_files = []
|
||||
list_all_files = glob.glob(Path +'/*.txt.tsv')
|
||||
return list_all_files
|
||||
|
||||
|
||||
# Main function
|
||||
|
||||
try:
|
||||
data_file_Path = sys.argv[1]
|
||||
output_file_Path = sys.argv[2]
|
||||
except:
|
||||
print 'Usage: python Concatenate_GTF_results.py /datasetPath /outputFilePath_and_Name'
|
||||
print 'Data file not found, error in given directory'
|
||||
sys.exit(1)
|
||||
|
||||
# Open the output file
|
||||
|
||||
try:
|
||||
output = open(output_file_Path, 'w')
|
||||
except OSError:
|
||||
print 'output file error'
|
||||
|
||||
# get all the GTF result files in given directory
|
||||
File_list = []
|
||||
File_list = list_all_files(data_file_Path)
|
||||
File_list.sort()
|
||||
|
||||
i = 0
|
||||
for file in File_list:
|
||||
#parse the file names given in absolute path
|
||||
file_name = file.strip().split('/')[-1]
|
||||
file_name = file_name.rstrip('.txt.tsv')
|
||||
# function to read tsv files from a given directory
|
||||
#open the file
|
||||
data = open(file,'r')
|
||||
#reading the label line
|
||||
labelLine = data.readline()
|
||||
label = labelLine.strip().split('\t')
|
||||
#write the label
|
||||
#updates2010July26: update following label writing code
|
||||
if i == 0:
|
||||
# output.write('cluster origin')
|
||||
for element in label:
|
||||
output.write(element)
|
||||
output.write('\t')
|
||||
i = i + 1
|
||||
#updates2010July26 End
|
||||
#switch to the next line
|
||||
output.write('\n')
|
||||
|
||||
#read the GO terms
|
||||
GOTermLines = data.readlines()
|
||||
for GOTerm in GOTermLines:
|
||||
GOTerm = GOTerm.strip().strip('\t')
|
||||
if GOTerm != '':
|
||||
#updates2010July26: remove the code to write the first column 'REMc cluster ID'
|
||||
#output.write(file_name)
|
||||
#output.write('\t')
|
||||
##updates2010July26 update end
|
||||
output.write(GOTerm + '\n')
|
||||
#output.write('\n')
|
||||
output.close()
|
||||
print("local Process copy of Concatenate_GTF_results.py")
|
||||
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This code is to concatenate the batch GO Term Finder results (.tsv) generated from batch GTF perl code(Chris Johnson, U of Tulsa) into a list table
|
||||
|
||||
import sys, os, string, glob
|
||||
|
||||
|
||||
# return the file list
|
||||
def list_all_files(Path):
|
||||
list_all_files = []
|
||||
list_all_files = glob.glob(Path +'/*.txt.tsv')
|
||||
return list_all_files
|
||||
|
||||
|
||||
# Main function
|
||||
|
||||
try:
|
||||
data_file_Path = sys.argv[1]
|
||||
output_file_Path = sys.argv[2]
|
||||
except:
|
||||
print 'Usage: python Concatenate_GTF_results.py /datasetPath /outputFilePath_and_Name'
|
||||
print 'Data file not found, error in given directory'
|
||||
sys.exit(1)
|
||||
|
||||
# Open the output file
|
||||
|
||||
try:
|
||||
output = open(output_file_Path, 'w')
|
||||
except OSError:
|
||||
print 'output file error'
|
||||
|
||||
# get all the GTF result files in given directory
|
||||
File_list = []
|
||||
File_list = list_all_files(data_file_Path)
|
||||
File_list.sort()
|
||||
|
||||
i = 0
|
||||
for file in File_list:
|
||||
#parse the file names given in absolute path
|
||||
file_name = file.strip().split('/')[-1]
|
||||
file_name = file_name.rstrip('.txt.tsv')
|
||||
# function to read tsv files from a given directory
|
||||
#open the file
|
||||
data = open(file,'r')
|
||||
#reading the label line
|
||||
labelLine = data.readline()
|
||||
label = labelLine.strip().split('\t')
|
||||
#write the label
|
||||
#updates2010July26: update following label writing code
|
||||
if i == 0:
|
||||
# output.write('cluster origin')
|
||||
for element in label:
|
||||
output.write(element)
|
||||
output.write('\t')
|
||||
i = i + 1
|
||||
#updates2010July26 End
|
||||
#switch to the next line
|
||||
output.write('\n')
|
||||
|
||||
#read the GO terms
|
||||
GOTermLines = data.readlines()
|
||||
for GOTerm in GOTermLines:
|
||||
GOTerm = GOTerm.strip().strip('\t')
|
||||
if GOTerm != '':
|
||||
#updates2010July26: remove the code to write the first column 'REMc cluster ID'
|
||||
#output.write(file_name)
|
||||
#output.write('\t')
|
||||
##updates2010July26 update end
|
||||
output.write(GOTerm + '\n')
|
||||
#output.write('\n')
|
||||
output.close()
|
||||
188
workflow/.old/ScriptTemplates/GTAtemplate.R
Normal file
188
workflow/.old/ScriptTemplates/GTAtemplate.R
Normal file
@@ -0,0 +1,188 @@
|
||||
#GTA (GoTermAveraging) Starting (Working Directory is /Code) All paths relative to /Code
|
||||
|
||||
#Currently must run for each Experiment. _Future could loop thru the number of experiments involved in study. JWR
|
||||
Wstudy= getwd()
|
||||
if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv'
|
||||
expName= "Exp1"
|
||||
dir.create("../GTAresults/Exp1")
|
||||
}
|
||||
if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv'
|
||||
expName[2]= "Exp2"
|
||||
dir.create("../GTAresults/Exp2")
|
||||
}
|
||||
if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv'
|
||||
expName[3]= "Exp3"
|
||||
dir.create("../GTAresults/Exp3")
|
||||
}
|
||||
if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv'
|
||||
expName[4]= "Exp4"
|
||||
dir.create("../GTAresults/Exp4")
|
||||
}
|
||||
|
||||
outputPathGTA= "../GTAresults"
|
||||
#dir.create(outPathGTA)
|
||||
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("plyr")
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
|
||||
SGD_Terms_file <- "../Code/go_terms.tab" #ArgsScore[2]
|
||||
#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
|
||||
SGD_features_file <- "../Code/gene_association.sgd" #ArgsScore[3]
|
||||
|
||||
#R and Rstudio have issues: The for loop(189) seemed to fail to evaluate the paste (or paste0) to build the inputFile inside the for loop and bail as the second loop began. This crude fix below, seems to have alleviated the failure to loop problem at least for now. Also for some annoying reason, the underscores between word are sometimes not shown when they exist. No ryme of reason!!!
|
||||
|
||||
|
||||
#Begin for loop for experiments in this study-----------------ZScores_Interaction.csv
|
||||
for(m in 1:length(inputFile)){
|
||||
|
||||
#inputFile <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1]
|
||||
X <- read.csv(file = inputFile[m],stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
if(colnames(X)[1] == "OrfRep"){
|
||||
colnames(X)[1] <- "ORF"
|
||||
}
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
#Gene_Association is the gene association to GO term file
|
||||
Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Get the ORF names associated with each gene/GO term
|
||||
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Get the numeric GO ID for matching
|
||||
Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
#get all unique GO terms
|
||||
GO_Terms <- unique(Gene_Association$GO_ID)
|
||||
#create a character vector with just the ColNames of the input file to store the scores for each GO term
|
||||
Col_Names_X <- colnames(X)
|
||||
#create a data_frame with header from input_file
|
||||
GO_Term_Averages <- X[0,]
|
||||
#fill table with NAs same length as number of GO terms
|
||||
GO_Term_Averages[1:length(GO_Terms),] <- NA
|
||||
#change the first and second col names to GO_ID and Term
|
||||
colnames(GO_Term_Averages)[1] <- "GO_ID"
|
||||
colnames(GO_Term_Averages)[2] <- "Term"
|
||||
|
||||
#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
|
||||
GO_Term_Averages$Ontology <- NA
|
||||
GO_Term_Averages$NumGenes <- NA
|
||||
GO_Term_Averages$AllPossibleGenes <- NA
|
||||
GO_Term_Averages$Genes <- NA
|
||||
GO_Term_Averages$ORFs <- NA
|
||||
|
||||
#create a data.frame for the standard deviation info
|
||||
GO_Term_SD <- X[0,]
|
||||
GO_Term_SD[1:length(GO_Terms),] <- NA
|
||||
colnames(GO_Term_SD)[1] <- "GO_ID"
|
||||
colnames(GO_Term_SD)[2] <- "Term"
|
||||
|
||||
#Loop for each GO term to get an average L and K Z score
|
||||
for(i in 1:length(GO_Terms)){
|
||||
#get the GO_Term
|
||||
ID <- GO_Terms[i]
|
||||
#Get data.frame for all genes associated to the GO Term
|
||||
ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
|
||||
#get a vector of just the gene names
|
||||
ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
|
||||
if(length(unique(ID_AllGenes_vector)) > 4000){
|
||||
next()
|
||||
}
|
||||
#get the GO term character description where numeric Terms ID matches GO_Term's ID
|
||||
GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
|
||||
#get the Z scores for all genes in the GO_ID
|
||||
Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
|
||||
#get the Gene names and ORFs for the term
|
||||
GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
|
||||
GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
|
||||
|
||||
#dataframe to report the averages for a GO term
|
||||
#get the GO ID
|
||||
GO_Term_Averages$GO_ID[i] <- as.character(ID)
|
||||
#get the term name
|
||||
GO_Term_Averages$Term[i] <- GO_Description_Term
|
||||
#get total number of genes annotated to the Term that we have in our library
|
||||
GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
|
||||
#get total number of genes annotated to the Term in SGD
|
||||
GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
|
||||
#get the ontology of the term
|
||||
GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
|
||||
|
||||
#calculate the average score for every column
|
||||
for(j in 3:length(X[1,])){
|
||||
GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
|
||||
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
|
||||
}
|
||||
|
||||
#also calculate same values for the SD
|
||||
GO_Term_SD$GO_ID[i] <- as.character(ID)
|
||||
#get the term name
|
||||
GO_Term_SD$Term[i] <- GO_Description_Term
|
||||
|
||||
#calculate column scores for SD
|
||||
for(j in 3:length(X[1,])){
|
||||
GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
|
||||
#GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
|
||||
}
|
||||
}
|
||||
#add either _Avg or _SD depending on if the calculated score is an average or SD
|
||||
colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
|
||||
colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
|
||||
#combine the averages with the SDs to make one big data.frame
|
||||
X2 <- cbind(GO_Term_Averages,GO_Term_SD)
|
||||
#test[ , order(names(test))]
|
||||
X2 <- X2[,order(names(X2))]
|
||||
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
#create output file
|
||||
write.csv(X2,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_All.csv",sep=""),row.names=FALSE)
|
||||
#remove NAs
|
||||
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
#identify redundant GO terms
|
||||
|
||||
for(i in 1:length(X3[,1])){
|
||||
#loop through each GO term - get term
|
||||
GO_term_ID <- as.character(X3$GO_ID_Avg[i])
|
||||
#get term in the X3
|
||||
X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
|
||||
#get anywhere that has the same number K_Avg value
|
||||
X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
|
||||
if(length(X3_Temp2[,1]) > 1){
|
||||
if(length(unique(X3_Temp2$Genes_Avg)) == 1){
|
||||
X3_Temp2 <- X3_Temp2[1,]
|
||||
}
|
||||
}
|
||||
|
||||
if(i == 1){
|
||||
Y <- X3_Temp2
|
||||
}
|
||||
if(i > 1){
|
||||
Y <- rbind(Y,X3_Temp2)
|
||||
}
|
||||
}
|
||||
Y1 <- unique(Y)
|
||||
|
||||
write.csv(Y1,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
|
||||
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
|
||||
write.csv(Y2,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
|
||||
write.csv(Y3,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
|
||||
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
|
||||
write.csv(Y4,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
|
||||
write.csv(Y5,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
|
||||
#End of 'for loop'
|
||||
}
|
||||
|
||||
2730
workflow/.old/ScriptTemplates/InteractionTemplate230119.R
Normal file
2730
workflow/.old/ScriptTemplates/InteractionTemplate230119.R
Normal file
File diff suppressed because it is too large
Load Diff
94
workflow/.old/ScriptTemplates/JoinInteractExps.R
Normal file
94
workflow/.old/ScriptTemplates/JoinInteractExps.R
Normal file
@@ -0,0 +1,94 @@
|
||||
|
||||
|
||||
#JoinCSVfilesREMc.R
|
||||
#The input files should be entered in order from the greatest number of rows(Orfs) to least.
|
||||
Args <- commandArgs(TRUE)
|
||||
Wstudy= getwd()
|
||||
dir.create("../JoinFiles") #(paste0(Wstudy,"/JoinFiles"))
|
||||
outDir <- "../JoinFiles" #paste0(Wstudy,"/JoinFiles")
|
||||
print(outDir)
|
||||
#ArgsJoin= 'asdf'
|
||||
#ArgsJoin[1]= "../JoinFiles"
|
||||
#ArgsJoin[2]= "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
|
||||
#ArgsJoin[3]= "../Exp2/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv")
|
||||
|
||||
#outDir <- ArgsJoin[1] #Output Directory
|
||||
|
||||
print(length(Args)) #display the number of arguments on terminal
|
||||
|
||||
#open required library for the join function (libraries must already be install on R)
|
||||
library(plyr)
|
||||
library(dplyr)
|
||||
library(sos)
|
||||
|
||||
#read in the files for your experiment and
|
||||
#join the two files at a time as a function of how many Args, list the larger file first ? in this example X2 has the larger number of genes.
|
||||
#if X1 has a larger number of genes, switch the order of X1 and X2
|
||||
if(length(Args)==2) {
|
||||
X1 <- read.csv(file= Args[1],stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file= Args[2],stringsAsFactors = FALSE)
|
||||
X <- join(X1,X2,by="OrfRep")
|
||||
OBH=X[,order(colnames(X))] #OrderByHeader
|
||||
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
|
||||
headSel= select(headSel, -'Gene.1') #remove 'Gene.1 column
|
||||
headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums
|
||||
headSel2 = select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums
|
||||
|
||||
}else if(length(Args)==3){
|
||||
X1 <- read.csv(file= Args[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file= Args[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file= Args[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
|
||||
X <- join(X1,X2,by="OrfRep")
|
||||
X <- join(X,X3,by="OrfRep")
|
||||
OBH=X[,order(colnames(X))] #OrderByHeader
|
||||
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
|
||||
headSel= select(headSel, -'Gene.1',-'Gene.2')
|
||||
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
|
||||
headSel2 = select(headSel2, -'Gene.1',-'Gene.2')
|
||||
|
||||
}else if(length(Args==4)){
|
||||
X1 <- read.csv(file= Args[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file= Args[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file= Args[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
|
||||
X4 <- read.csv(file= Args[4],stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
|
||||
X <- join(X1,X2,by="OrfRep")
|
||||
X <- join(X,X3,by="OrfRep")
|
||||
X <- join(X,X4,by="OrfRep")
|
||||
OBH=X[,order(colnames(X))] #OrderByHeader
|
||||
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
|
||||
headSel= select(headSel, -'Gene.1',-'Gene.2',-'Gene.3')
|
||||
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
|
||||
headSel2 = select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3')
|
||||
|
||||
}
|
||||
|
||||
#headSel$contains('Z_Shift') %>% replace_na(0.001)
|
||||
headers<-colnames(headSel)
|
||||
i=0
|
||||
|
||||
for(i in 1:length(headers)){
|
||||
|
||||
if(grepl("Shift",headers[i])) {
|
||||
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.001
|
||||
}
|
||||
if(grepl("Z_lm_",headers[i])) {
|
||||
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.0001
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
REMcRdy= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_'))
|
||||
shiftOnly= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift'))
|
||||
|
||||
combI= headSel2 #Starting Template orf, Genename columns
|
||||
headersRemc<-colnames(REMcRdy)
|
||||
#Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
||||
for(i in 3:length(headersRemc)){
|
||||
combI=cbind.data.frame(combI, shiftOnly[i])
|
||||
combI=cbind.data.frame(combI, REMcRdy[i])
|
||||
}
|
||||
|
||||
|
||||
write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"))
|
||||
write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"))
|
||||
write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"))
|
||||
287
workflow/.old/ScriptTemplates/JoinInteractExps3dev.R
Normal file
287
workflow/.old/ScriptTemplates/JoinInteractExps3dev.R
Normal file
@@ -0,0 +1,287 @@
|
||||
#JoinInteractExps3dev.R
|
||||
|
||||
|
||||
#User prompt for std Value
|
||||
cat("Enter a Standard Deviation value to filter data to be used by REMc ?\n")
|
||||
inp <- readLines(file("stdin"), n = 1L)
|
||||
cat(paste("Standard Deviation Value is", inp, "\n"))
|
||||
|
||||
#set std deviation multiplier default if no user entry
|
||||
if(is.numeric(inp)){
|
||||
std= inp
|
||||
}else{std= 2}
|
||||
|
||||
|
||||
#The input files should be entered in order from the greatest number of rows(Orfs) to least.
|
||||
#Args <- commandArgs(TRUE)
|
||||
#if(length(Args)==0){
|
||||
# std=0
|
||||
#}else{
|
||||
# std=Args[1]
|
||||
#}
|
||||
|
||||
print(paste("SD=",std))
|
||||
#Wstudy= getwd()
|
||||
#dir.create("../JoinFiles") #(paste0(Wstudy,"/JoinFiles"))
|
||||
#outDir <- "../JoinFiles" #paste0(Wstudy,"/JoinFiles")
|
||||
#dir.create("../REMc") #(paste0(Wstudy,"/JoinFiles"))
|
||||
outDir <- "./" #../REMc" #paste0(Wstudy,"/JoinFiles")
|
||||
|
||||
print(outDir)
|
||||
#Args= 'asdf'
|
||||
#Args[1]= "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
|
||||
#Args[2]= "../Exp2/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv")
|
||||
#Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
|
||||
#Args[4]= "../Exp4/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp2/ZScores/ZScores_Interaction.csv")
|
||||
#Args[3]= "../Exp3/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp3/ZScores/ZScores_Interaction.csv")
|
||||
|
||||
inputFile= 'asdf'
|
||||
if (file.exists('../Exp1/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv'
|
||||
}
|
||||
if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv'
|
||||
}
|
||||
if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv'
|
||||
}
|
||||
if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv'
|
||||
}
|
||||
|
||||
#Args= inputFile
|
||||
|
||||
|
||||
|
||||
#outDir <- ArgsJoin[1] #Output Directory
|
||||
|
||||
print(length(inputFile)) #display the number of arguments on terminal
|
||||
|
||||
#open required library for the join function (libraries must already be install on R)
|
||||
library(plyr)
|
||||
library(dplyr)
|
||||
library(sos)
|
||||
|
||||
#read in the files for your experiment and
|
||||
#join the two files at a time as a function of how many inputFile, list the larger file first ? in this example X2 has the larger number of genes.
|
||||
#if X1 has a larger number of genes, switch the order of X1 and X2
|
||||
if(length(inputFile)==2) {
|
||||
X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE)
|
||||
X <- join(X1,X2,by="OrfRep")
|
||||
OBH=X[,order(colnames(X))] #OrderByHeader
|
||||
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
|
||||
headSel= select(headSel, -'Gene.1') #remove 'Gene.1 column
|
||||
headSel2 = select(OBH, contains('OrfRep'), matches('Gene')) #Frame for interleaving Z_lm with Shift colums
|
||||
headSel2 = select(headSel2, -'Gene.1') #remove 'Gene.1 column #Frame for interleaving Z_lm with Shift colums
|
||||
|
||||
}else if(length(inputFile)==3){
|
||||
X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
|
||||
X <- join(X1,X2,by="OrfRep")
|
||||
X <- join(X,X3,by="OrfRep")
|
||||
OBH=X[,order(colnames(X))] #OrderByHeader
|
||||
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
|
||||
headSel= select(headSel, -'Gene.1',-'Gene.2')
|
||||
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
|
||||
headSel2 = select(headSel2, -'Gene.1',-'Gene.2')
|
||||
|
||||
}else if(length(inputFile)==4){
|
||||
X1 <- read.csv(file= inputFile[1],stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file= inputFile[2],stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file= inputFile[3],stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
|
||||
X4 <- read.csv(file= inputFile[4],stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
|
||||
X <- join(X1,X2,by="OrfRep")
|
||||
X <- join(X,X3,by="OrfRep")
|
||||
X <- join(X,X4,by="OrfRep")
|
||||
OBH=X[,order(colnames(X))] #OrderByHeader
|
||||
headSel= select(OBH, contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'),contains('Z_lm_L'), contains('Z_Shift_L'))
|
||||
headSel= select(headSel, -'Gene.1',-'Gene.2',-'Gene.3')
|
||||
headSel2 = select(OBH, contains('OrfRep'), matches('Gene'))
|
||||
headSel2 = select(headSel2, -'Gene.1',-'Gene.2',-'Gene.3')
|
||||
|
||||
}
|
||||
|
||||
#headSel$contains('Z_Shift') %>% replace_na(0.001)
|
||||
headers<-colnames(headSel)
|
||||
i=0
|
||||
for(i in 1:length(headers)){
|
||||
|
||||
if(grepl("Shift",headers[i])) {
|
||||
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.001
|
||||
}
|
||||
if(grepl("Z_lm_",headers[i])) {
|
||||
headSel[headers[i]][is.na(headSel[headers[i]])] = 0.0001
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#2SD option code to exclude Z_lm values less than 2 standard Deviations
|
||||
|
||||
|
||||
#+++++++++++++
|
||||
|
||||
REMcRdy= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_lm_'))
|
||||
shiftOnly= select(headSel, contains('OrfRep'), matches('Gene'), contains('Z_Shift'))
|
||||
|
||||
#Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
|
||||
Labels <- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE,sep= ",")
|
||||
|
||||
|
||||
#R can't do math reliably within a nested loop, Therefore requires a complex text based work around+++++++++++++++
|
||||
#Do it with text search and replace or modify.
|
||||
#R is just a badly designed or rather badly evolved mutant language!!! Full of problems and inconsistencies!!!
|
||||
#R causes huge waste of time.
|
||||
#R is not worth fixing. It needs to be discontinued both in use and development before it does more harm.
|
||||
#Using Text search grepl to relabel headers+++++++++++++++++++++++++++++++++++++++++
|
||||
REMcRdyHdr= colnames(REMcRdy)
|
||||
REMcRdyLabels= 'asdf'
|
||||
shftHdr= colnames(shiftOnly)
|
||||
shiftLabels='asdf'
|
||||
shiftLabels[1:2]<-shftHdr[1:2]
|
||||
REMcRdyLabels[1:2]<-REMcRdyHdr[1:2]
|
||||
|
||||
for(i in 3:(length(shftHdr))){
|
||||
if(i==3){
|
||||
shiftLabels[3]<-paste0(Labels[1,2],".",shftHdr[3])
|
||||
REMcRdyLabels[3]<-paste0(Labels[1,2],".",REMcRdyHdr[3]) }
|
||||
if(i==5){
|
||||
shiftLabels[5]<-paste0(Labels[1,2],".",shftHdr[5])
|
||||
REMcRdyLabels[5]<-paste0(Labels[1,2],".",REMcRdyHdr[5])
|
||||
}
|
||||
if(i==7){
|
||||
shiftLabels[7]<-paste0(Labels[1,2],".",shftHdr[7])
|
||||
REMcRdyLabels[7]<-paste0(Labels[1,2],".",REMcRdyHdr[7])
|
||||
}
|
||||
if(grepl(".1",shftHdr[i],fixed=true)){
|
||||
shiftLabels[i]<-paste0(Labels[2,2],".",shftHdr[i])
|
||||
REMcRdyLabels[i]<-paste0(Labels[2,2],".",REMcRdyHdr[i])}
|
||||
if (grepl(".2",shftHdr[i],fixed=true)){
|
||||
shiftLabels[i]<-paste0(Labels[3,2],".",shftHdr[i])
|
||||
REMcRdyLabels[i]<-paste0(Labels[3,2],".",REMcRdyHdr[i])}
|
||||
if(grepl(".3",shftHdr[i],fixed=true)){
|
||||
shiftLabels[i]<-paste0(Labels[4,2],".",shftHdr[i])
|
||||
REMcRdyLabels[i]<-paste0(Labels[4,2],".",REMcRdyHdr[i])}
|
||||
}
|
||||
|
||||
for(i in 3:(length(REMcRdyLabels))){
|
||||
j=as.integer(i)
|
||||
REMcRdyLabels[j]<- gsub("[.]", "_", REMcRdyLabels[j])
|
||||
shiftLabels[j]<- gsub("[.]", "_", shiftLabels[j])
|
||||
}
|
||||
|
||||
colnames(shiftOnly)<- shiftLabels
|
||||
colnames(REMcRdy)<- REMcRdyLabels
|
||||
|
||||
#+++++++++++++++++++++++
|
||||
combI= headSel2 #Starting Template orf, Genename columns
|
||||
#headersRemc<-colnames(REMcRdy)
|
||||
#Reoder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
||||
for(i in 3:length(colnames(REMcRdy))){
|
||||
combI=cbind.data.frame(combI, shiftOnly[i])
|
||||
combI=cbind.data.frame(combI, REMcRdy[i])
|
||||
}
|
||||
|
||||
#Effort to use R vectorization select data greater than 2 Standard deviations.
|
||||
#R can't handle variables in the...$(varible). The column names must be explicit. Just another R issue of thoughtlessness.
|
||||
#REMcGT2<- REMcRdy[abs(REMcRdy$(REMcRdyLabels[3]))>=2 |abs(REMcRdy$REMcRdyLabels[4])>=2 |abs(REMcRdy$REMcRdyLabels[5])>=2 |abs(REMcRdy$REMcRdyLabels[6])>=2]
|
||||
#another failed R work-around
|
||||
#REMcGT2<- REMcRdy[abs(REMcRdy[,3])>=2 | abs(REMcRdy[,4])>=2 | abs(REMcRdy[,5])>=2 | abs(REMcRdy[,6])>=2]
|
||||
#A simple task made difficult by R
|
||||
#Well R doesn't handle loops well but I may have to try some extreme measures.
|
||||
#a Fundamental reason RisFuckedUp StackOverflow ...$ and everything else (for, if, +,^,...) that is treated as a function can not accept argument which must be evaluated.
|
||||
#Therefore just another reason R is shit! This alone should qualify the language to the garbage bin.
|
||||
#Working in R is a constant battle to workaround its shortsighted deficits.
|
||||
#REMcGT2<- REMcRdy[abs(REMcRdy$(REMcRdyLabels[3]))>=2 |abs(REMcRdy$REMcRdyLabels[4])>=2 |abs(REMcRdy$REMcRdyLabels[5])>=2 |abs(REMcRdy$REMcRdyLabels[6])>=2,]
|
||||
#Vectorization with variable adaptation of Sean's string explicit code will not work because R cannot accept variables. Piss poor language.
|
||||
#So what the hell to do???
|
||||
#Basically we just need to 'OR' all the column vectors that are >=2 'True' and let that be a binary vector to apply across the data.frame REMcRdy
|
||||
#R doesn't allow a vector of vectors!
|
||||
#I guess the only way to do this simple task in R is to create a set of explicit variables,
|
||||
#then fill each variable with the binary results, next 'OR those results and use that vector of bolean results to select values from data.frame.
|
||||
#This is totally stupid crazy. But this is R.
|
||||
#R violates all the fundamental rules of automation. R Requires specific explicit parameters.
|
||||
#Since the largest REMc is four experiments make eight discrete vectors(K and L for each of the experiments).
|
||||
#The totally stupid work-around for R
|
||||
|
||||
Vec1= NA
|
||||
Vec2= NA
|
||||
Vec3= NA
|
||||
Vec4= NA
|
||||
Vec5= NA
|
||||
Vec6= NA
|
||||
Vec7= NA
|
||||
Vec8= NA
|
||||
|
||||
if(length(REMcRdy)== 6){
|
||||
Vec1= abs(REMcRdy[,3])>=std
|
||||
Vec2= abs(REMcRdy[,4])>=std
|
||||
Vec3= abs(REMcRdy[,5])>=std
|
||||
Vec4= abs(REMcRdy[,6])>=std
|
||||
bolVec= Vec1 | Vec2
|
||||
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
||||
REMcRdyGT2[ ,3:6]= REMcRdy[bolVec,3:6]
|
||||
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
||||
shiftOnlyGT2[ ,3:6]= shiftOnly[bolVec,3:6]
|
||||
}
|
||||
if(length(REMcRdy)== 8){
|
||||
Vec1= abs(REMcRdy[,3])>=std
|
||||
Vec2= abs(REMcRdy[,4])>=std
|
||||
Vec3= abs(REMcRdy[,5])>=std
|
||||
Vec4= abs(REMcRdy[,6])>=std
|
||||
Vec5= abs(REMcRdy[,7])>=std
|
||||
Vec6= abs(REMcRdy[,8])>=std
|
||||
bolVec= Vec1 | Vec2 |Vec3
|
||||
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
||||
REMcRdyGT2[ ,3:8]= REMcRdy[bolVec,3:8]
|
||||
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
||||
shiftOnlyGT2[ ,3:8]= shiftOnly[bolVec,3:8]
|
||||
}
|
||||
if(length(REMcRdy)== 10){
|
||||
Vec1= abs(REMcRdy[,3])>=std
|
||||
Vec2= abs(REMcRdy[,4])>=std
|
||||
Vec3= abs(REMcRdy[,5])>=std
|
||||
Vec4= abs(REMcRdy[,6])>=std
|
||||
Vec5= abs(REMcRdy[,7])>=std
|
||||
Vec6= abs(REMcRdy[,8])>=std
|
||||
Vec7= abs(REMcRdy[,9])>=std
|
||||
Vec8= abs(REMcRdy[,10])>=std
|
||||
bolVec= Vec1 | Vec2 |Vec3 |Vec4|Vec5|Vec6|Vec7|Vec8
|
||||
REMcRdyGT2= REMcRdy[bolVec,1:2]
|
||||
REMcRdyGT2[ ,3:10]= REMcRdy[bolVec,3:10]
|
||||
shiftOnlyGT2= shiftOnly[bolVec,1:2]
|
||||
shiftOnlyGT2[ ,3:10]= shiftOnly[bolVec,3:10]
|
||||
}
|
||||
|
||||
|
||||
if(std!=0){
|
||||
REMcRdy= REMcRdyGT2 #[,2:length(REMcRdyGT2)]
|
||||
shiftOnly= shiftOnlyGT2 #[,2:length(shiftOnlyGT2)]
|
||||
}
|
||||
|
||||
if(std==0){
|
||||
REMcRdy= REMcRdy #[,2:length(REMcRdy)]
|
||||
shiftOnly= shiftOnly #[,2:length(shiftOnly)]
|
||||
}
|
||||
#Yet again R creates a problem, placing hidden "" around the header names. The following
|
||||
# is intended to remove those quote so that the "" donot blow up the Java REMc.
|
||||
#Use ,quote=F in the write.csv statement to fix R output file.
|
||||
|
||||
print(paste("SD=",std))
|
||||
print(getwd())
|
||||
#write.csv(combI,file = file.path(outDir,"CombinedKLzscores.csv"),row.names = FALSE)
|
||||
write.csv(REMcRdy,file = file.path(outDir,"REMcRdy_lm_only.csv"),row.names = FALSE, quote=F)
|
||||
write.csv(shiftOnly,file = file.path(outDir,"Shift_only.csv"),row.names = FALSE, quote=F)
|
||||
|
||||
#LabelStd <- read.table(file= "./Parameters.csv",stringsAsFactors = FALSE,sep= ",")
|
||||
pwd=getwd()
|
||||
print(getwd)
|
||||
|
||||
LabelStd<- read.csv(file= "../Code/StudyInfo.csv",stringsAsFactors = FALSE)
|
||||
print(std)
|
||||
LabelStd[,4]= as.numeric(std)
|
||||
write.csv(LabelStd,file="../Code/Parameters.csv",row.names = FALSE)
|
||||
write.csv(LabelStd,file="../Code/StudyInfo.csv",row.names = FALSE)
|
||||
cat(paste("Standard Deviation Value was set as", std, "\n"))
|
||||
340
workflow/.old/ScriptTemplates/PairwiseK_lbl.R
Normal file
340
workflow/.old/ScriptTemplates/PairwiseK_lbl.R
Normal file
@@ -0,0 +1,340 @@
|
||||
#4. GTA K Pairwise Compare
|
||||
#Sean's Arg convention for info only
|
||||
#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
|
||||
#Arg1 is Average_GOTerms_All_1.csv
|
||||
#Arg2 is the name to give GTA results 1
|
||||
#Arg3 is Average_GOTerms_All2.csv
|
||||
#Arg4 is the name to give GTA results 2
|
||||
#Arg5 is the directory to put the files into
|
||||
#Arg 1 is the GTA results 1
|
||||
#Arg 2 is the name of GTA results 1 to print in the results
|
||||
#Arg 3 is GTA results 3
|
||||
#Arg 4 is the name of GTA results 2 to print in the results
|
||||
#Arg 4 is the name of GTA results 2 to print in the results
|
||||
#arg 5 is the directory to put the results into (and create that directory if needed)
|
||||
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
library(htmlwidgets)
|
||||
library(extrafont)
|
||||
library(grid)
|
||||
library(ggthemes)
|
||||
Args <- commandArgs(TRUE)
|
||||
expNm= Args[1] #"Exp3"
|
||||
expNm[2]= Args[2] #"Exp4"
|
||||
|
||||
expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
|
||||
expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
|
||||
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
labels <- read.delim("StudyInfo.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
Name1 <- labels[expNumber1,1] #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
|
||||
Name2 <- labels[expNumber2,1] #ssArg4
|
||||
Wstudy= getwd()
|
||||
input_file1 <- paste0("../GTAresults/",expNm[1],"/Average_GOTerms_All.csv" ) #Args[1]
|
||||
input_file2 <- paste0("../GTAresults/",expNm[2],"/Average_GOTerms_All.csv" ) #Args[3]
|
||||
|
||||
pairDirL= paste0("../GTAresults/","PairwiseCompareL_",expNm[1],"-",expNm[2])
|
||||
pairDirK= paste0("../GTAresults/","PairwiseCompareK_",expNm[1],"-",expNm[2])
|
||||
outPathGTAcompare= "../GTAresults/PairwiseCompareL" #paste0(Wstudy,"/GTAresults/PairwiseCompareL)
|
||||
outPathGTAcompare[2]= "../GTAresults/PairwiseCompareK" #paste0(Wstudy,"/GTAresults/PairwiseCompareK")
|
||||
#dir.create(outPathGTAcompare[1])
|
||||
dir.create(pairDirL) #(outPathGTAcompare[1])
|
||||
dir.create(pairDirK) #(outPathGTAcompare[2])
|
||||
|
||||
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- pairDirK #outPathGTAcompare[2] #Args[5]
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#1
|
||||
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("127")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#2
|
||||
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
|
||||
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
|
||||
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
|
||||
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
|
||||
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
|
||||
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#2-170")
|
||||
#2
|
||||
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#3
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#3")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#4
|
||||
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
|
||||
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#4")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#5
|
||||
#only output GTA terms where average score is still above 2 after subtracting the SD
|
||||
#Z1 will ID aggravators, Z2 alleviators
|
||||
Z1 <- X
|
||||
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
|
||||
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
|
||||
|
||||
Z2 <- X
|
||||
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
|
||||
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
|
||||
|
||||
|
||||
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
|
||||
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
|
||||
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
|
||||
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
|
||||
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)),]
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#5")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#6
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#6")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#7
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
|
||||
X_abovethreshold$X2_Rank <- NA
|
||||
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#7")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#8
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#8")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
print("write csv files")
|
||||
write.csv(x=X,file = paste(getwd(),"/",outputpath,"/","All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
|
||||
#End of GTA Pairwise compare for K values
|
||||
632
workflow/.old/ScriptTemplates/PairwiseLK.R
Normal file
632
workflow/.old/ScriptTemplates/PairwiseLK.R
Normal file
@@ -0,0 +1,632 @@
|
||||
#4.This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
|
||||
#Arg1 is the first Experiment or a user ordered pair (i.e., Exp3)
|
||||
#Arg2 is the second Experiment or a user ordered pair (i.e., Exp4)
|
||||
#Note user can put whatever names they wish for the Canonical Exp1..4 in the ../Code/ExpLabels.txt spread sheet
|
||||
#which correlates the canonical Exp Name with the user stylized label for plots and output files.
|
||||
|
||||
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
library(htmlwidgets)
|
||||
library(extrafont)
|
||||
library(grid)
|
||||
library(ggthemes)
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
expNm= Args[1] #i.e., "Exp3"
|
||||
expNm[2]= Args[2] #i.e., "Exp4"
|
||||
expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
|
||||
expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
|
||||
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
Labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
|
||||
Name1 <- Labels[expNumber1,2] #ssArg2 These are now supplied by Code/ExpLabels.csv which is user edited
|
||||
Name2 <- Labels[expNumber2,2] #ssArg4
|
||||
Wstudy= getwd()
|
||||
input_file1 <- paste0("../GTAresults/",expNm[1],"/Average_GOTerms_All.csv" ) #Args[1]
|
||||
input_file2 <- paste0("../GTAresults/",expNm[2],"/Average_GOTerms_All.csv" ) #Args[3]
|
||||
|
||||
pairDirL= paste0("../GTAresults/","PairwiseCompareL_",expNm[1],"-",expNm[2])
|
||||
pairDirK= paste0("../GTAresults/","PairwiseCompareK_",expNm[1],"-",expNm[2])
|
||||
outPathGTAcompare= "../GTAresults/PairwiseCompareL" #paste0(Wstudy,"/GTAresults/PairwiseCompareL)
|
||||
outPathGTAcompare[2]= "../GTAresults/PairwiseCompareK" #paste0(Wstudy,"/GTAresults/PairwiseCompareK")
|
||||
#dir.create(outPathGTAcompare[1])
|
||||
dir.create(pairDirL) #(outPathGTAcompare[1])
|
||||
dir.create(pairDirK) #(outPathGTAcompare[2])
|
||||
|
||||
###########BEGIN PAIRWISE L-----LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- pairDirL #outPathGTAcompare[1] #Args[5]
|
||||
#outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
|
||||
print("39")
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
print(117)
|
||||
|
||||
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("130")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
|
||||
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
|
||||
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
|
||||
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
|
||||
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
|
||||
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
|
||||
print("155")
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
|
||||
#pgg
|
||||
print("#2-174")
|
||||
#2
|
||||
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
|
||||
#3
|
||||
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#3")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
#4
|
||||
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
|
||||
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#4")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
|
||||
#only output GTA terms where average score is still above 2 after subtracting the SD
|
||||
#Z1 will ID aggravators, Z2 alleviators
|
||||
Z1 <- X
|
||||
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
|
||||
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
|
||||
|
||||
Z2 <- X
|
||||
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
|
||||
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
|
||||
|
||||
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
|
||||
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
|
||||
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
|
||||
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
|
||||
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
#5
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)),]
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#5")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
#6
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#6")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
|
||||
X_abovethreshold$X2_Rank <- NA
|
||||
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
|
||||
|
||||
#7
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#7")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
#8
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#8")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
print("write csv files L")
|
||||
write.csv(x=X,file = paste(outputpath,"/All_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
#End of L GTA Pairwise Compare
|
||||
|
||||
|
||||
|
||||
###########BEGIN PAIRWISE K-----KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- pairDirK #outPathGTAcompare[2] #Args[5]
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
#1
|
||||
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("127")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#2
|
||||
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
|
||||
X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
|
||||
X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
|
||||
Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
|
||||
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
|
||||
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
|
||||
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#2-170")
|
||||
#2
|
||||
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#3
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
|
||||
plotly_path <- paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#3")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#4
|
||||
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
|
||||
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#4")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#5
|
||||
#only output GTA terms where average score is still above 2 after subtracting the SD
|
||||
#Z1 will ID aggravators, Z2 alleviators
|
||||
Z1 <- X
|
||||
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
|
||||
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
|
||||
|
||||
Z2 <- X
|
||||
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
|
||||
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
|
||||
|
||||
|
||||
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
|
||||
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
|
||||
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
|
||||
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
|
||||
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)),]
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#5")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#6
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#6")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#7
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
|
||||
X_abovethreshold$X2_Rank <- NA
|
||||
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
|
||||
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#7")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
#8
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(getwd(),"/",outputpath,"/","Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#8")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirK,fname))
|
||||
|
||||
print("write csv files")
|
||||
write.csv(x=X,file = paste(getwd(),"/",outputpath,"/","All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
|
||||
#End of GTA Pairwise compare for K values
|
||||
332
workflow/.old/ScriptTemplates/PairwiseL_lbl.R
Normal file
332
workflow/.old/ScriptTemplates/PairwiseL_lbl.R
Normal file
@@ -0,0 +1,332 @@
|
||||
#4.This R script performs GTA L Pairwise Compares for user specified pairs of Experiments
|
||||
#Arg1 is the first Experiment or a user ordered pair (i.e., Exp3)
|
||||
#Arg2 is the second Experiment or a user ordered pair (i.e., Exp4)
|
||||
#Note user can put whatever names they wish for the Canonical Exp1..4 in the ../Code/ExpLabels.txt spread sheet
|
||||
#which correlates the canonical Exp Name with the user stylized label for plots and output files.
|
||||
|
||||
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
library(htmlwidgets)
|
||||
library(extrafont)
|
||||
library(grid)
|
||||
library(ggthemes)
|
||||
Args <- commandArgs(TRUE)
|
||||
|
||||
expNm= Args[1] #i.e., "Exp3"
|
||||
expNm[2]= Args[2] #i.e., "Exp4"
|
||||
expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
|
||||
expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
|
||||
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
labels <- read.delim("StudyInfo.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
Name1 <- labels[expNumber1,1] #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
|
||||
Name2 <- labels[expNumber2,1] #ssArg4
|
||||
Wstudy= getwd()
|
||||
input_file1 <- paste0("../GTAresults/",expNm[1],"/Average_GOTerms_All.csv" ) #Args[1]
|
||||
input_file2 <- paste0("../GTAresults/",expNm[2],"/Average_GOTerms_All.csv" ) #Args[3]
|
||||
|
||||
pairDirL= paste0("../GTAresults/","PairwiseCompareL_",expNm[1],"-",expNm[2])
|
||||
pairDirK= paste0("../GTAresults/","PairwiseCompareK_",expNm[1],"-",expNm[2])
|
||||
outPathGTAcompare= "../GTAresults/PairwiseCompareL" #paste0(Wstudy,"/GTAresults/PairwiseCompareL)
|
||||
outPathGTAcompare[2]= "../GTAresults/PairwiseCompareK" #paste0(Wstudy,"/GTAresults/PairwiseCompareK")
|
||||
#dir.create(outPathGTAcompare[1])
|
||||
dir.create(pairDirL) #(outPathGTAcompare[1])
|
||||
dir.create(pairDirK) #(outPathGTAcompare[2])
|
||||
|
||||
###########BEGIN PAIRWISE L-----LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
|
||||
#define the output path (as fourth argument from Rscript)
|
||||
outputpath <- pairDirL #outPathGTAcompare[1] #Args[5]
|
||||
#outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
|
||||
print("39")
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
}
|
||||
|
||||
X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
|
||||
X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
|
||||
print(117)
|
||||
|
||||
X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("130")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOntology.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
|
||||
#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
|
||||
X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
|
||||
X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
|
||||
Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
|
||||
X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
|
||||
print("155")
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
|
||||
gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
|
||||
#pgg
|
||||
print("#2-174")
|
||||
#2
|
||||
fname <- paste("/Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
|
||||
#3
|
||||
gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#3")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
#4
|
||||
X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
|
||||
gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#4")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
|
||||
#only output GTA terms where average score is still above 2 after subtracting the SD
|
||||
#Z1 will ID aggravators, Z2 alleviators
|
||||
Z1 <- X
|
||||
Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
|
||||
Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
|
||||
|
||||
Z2 <- X
|
||||
Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
|
||||
Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
|
||||
|
||||
X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
|
||||
X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
|
||||
|
||||
X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
|
||||
X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
|
||||
|
||||
Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
|
||||
Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
|
||||
X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
|
||||
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
|
||||
|
||||
#5
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)),]
|
||||
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#5")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
#6
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#6")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
|
||||
X_abovethreshold$X2_Rank <- NA
|
||||
X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
|
||||
|
||||
#7
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#7")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
#8
|
||||
gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) +
|
||||
xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
|
||||
geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) +
|
||||
geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
|
||||
ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
|
||||
theme_Publication_legend_right()
|
||||
pdf(paste(outputpath,"/","Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
print("#8")
|
||||
fname <- paste("Scatter_lm_GTA_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
|
||||
print(fname)
|
||||
htmlwidgets::saveWidget(pgg, file.path(getwd(),fname))
|
||||
file.rename(from = file.path(getwd(),fname), to = file.path(pairDirL,fname))
|
||||
|
||||
print("write csv files L")
|
||||
write.csv(x=X,file = paste(outputpath,"/All_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
write.csv(x=X_abovethreshold,file = paste(getwd(),"/",outputpath,"/","AboveThreshold_GTA_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
|
||||
#End of L GTA Pairwise Compare
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
# Finding correlation between removing_bad_spots_at_start and Sean's results for YOR1_dF
|
||||
# Intended to be run in RStudio, setting working directory to source code
|
||||
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
|
||||
# Read in the data
|
||||
|
||||
sean_data <- read.csv("ZScores_Interaction_Sean.csv",stringsAsFactors = FALSE) SS_ZScores_Interaction.csv
|
||||
remy_data <- read.csv("ZScores_Interaction_Remy.csv",stringsAsFactors = FALSE) JRcurated_ZScores_Interaction.csv
|
||||
|
||||
# We only care about L and K z scores for now, filter out other
|
||||
# stuff. Then, remove NAs; then, sort so they both match up.
|
||||
|
||||
sean_data_filtered <- as_tibble(sean_data)
|
||||
sean_data_filtered <- select(sean_data_filtered,OrfRep,Z_lm_L,Z_lm_K)
|
||||
sean_data_filtered <- filter(sean_data_filtered,!is.na(Z_lm_L),!is.na(Z_lm_K))
|
||||
sean_data_filtered <- arrange(sean_data_filtered,OrfRep)
|
||||
|
||||
remy_data_filtered <- as_tibble(remy_data)
|
||||
remy_data_filtered <- select(remy_data_filtered,OrfRep,Z_lm_L,Z_lm_K)
|
||||
remy_data_filtered <- filter(remy_data_filtered,!is.na(Z_lm_L),!is.na(Z_lm_K))
|
||||
remy_data_filtered <- arrange(remy_data_filtered,OrfRep)
|
||||
|
||||
# Now everything is lined up, do linear model of trying to predict
|
||||
# Sean L zscore with Remy L zscore, and make a plot
|
||||
|
||||
l_linear_model = lm(sean_data_filtered$Z_lm_L ~ remy_data_filtered$Z_lm_L)
|
||||
summary(l_linear_model)
|
||||
l_data = as.data.frame(cbind(remy_data_filtered$Z_lm_L,sean_data_filtered$Z_lm_L)) # Remy is V1 column, Sean is V2
|
||||
#View(l_data)
|
||||
l_gg <- ggplot(data=l_data,aes(x=V1,y=V2)) + labs(x="Remy_L",y="Sean_L")+ geom_point()
|
||||
plot(l_gg)
|
||||
|
||||
|
||||
# Repeat for K zscore
|
||||
|
||||
k_linear_model = lm(sean_data_filtered$Z_lm_K ~ remy_data_filtered$Z_lm_K)
|
||||
summary(k_linear_model)
|
||||
k_data = as.data.frame(cbind(remy_data_filtered$Z_lm_K,sean_data_filtered$Z_lm_K)) # Remy is V1 column, Sean is V2
|
||||
#View(l_data)
|
||||
l_gg <- ggplot(data=k_data,aes(x=V1,y=V2)) + labs(x="Remy_K",y="Sean_K")+ geom_point()
|
||||
plot(l_gg)
|
||||
765
workflow/.old/ScriptTemplates/TSHeatmaps5dev2.R
Normal file
765
workflow/.old/ScriptTemplates/TSHeatmaps5dev2.R
Normal file
@@ -0,0 +1,765 @@
|
||||
|
||||
library("ontologyIndex")
|
||||
library("ggplot2")
|
||||
library("RColorBrewer")
|
||||
library("grid")
|
||||
library("ggthemes")
|
||||
#library("plotly")
|
||||
#library("htmlwidgets")
|
||||
library("extrafont")
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("ggrepel")
|
||||
library(gplots)
|
||||
Wstudy=getwd()
|
||||
inputFile= 'asdf'
|
||||
inputFile[1] <- "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
|
||||
inputFile[2] <- "../Exp2/ZScores/ZScores_Interaction.csv"
|
||||
inputFile[3] <- "../Exp3/ZScores/ZScores_Interaction.csv"
|
||||
inputFile[4] <- "../Exp4/ZScores/ZScores_Interaction.csv"
|
||||
inputFile[5] <- "../Exp5/ZScores/ZScores_Interaction.csv"
|
||||
|
||||
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
|
||||
labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
|
||||
lstExpNum= 'asdf'
|
||||
a=0
|
||||
#R can't index data.frame's so some stupid nasty code must be implemented
|
||||
if (file.exists(inputFile[1])){
|
||||
X1 <- (read.csv(file = inputFile[1],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 1
|
||||
Name1 <- labels[1,1]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
|
||||
|
||||
if (file.exists(inputFile[2])){
|
||||
X2 <- (read.csv(file = inputFile[2],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 2
|
||||
Name2 <- labels[2,1]}
|
||||
if (file.exists(inputFile[3])){
|
||||
X3 <- (read.csv(file = inputFile[3],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 3
|
||||
Name3 <- labels[3,1]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
|
||||
|
||||
if (file.exists(inputFile[4])){
|
||||
X4 <- (read.csv(file = inputFile[4],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 4
|
||||
Name4 <- labels[4,1]}
|
||||
if (file.exists(inputFile[5])){
|
||||
X5 <- (read.csv(file = inputFile[5],stringsAsFactors=FALSE,header = TRUE))
|
||||
a=a+1
|
||||
lstExpNum[a]= 5
|
||||
Name5 <- labels[5,1]}
|
||||
#--------------------------Standard Names-------------------------------------------------------
|
||||
#ExpNames= c('Exp1','Exp2','Exp3','Exp4','Exp5')
|
||||
#expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
|
||||
#expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
|
||||
#expNumber3<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[3]))
|
||||
#expNumber4<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[4]))
|
||||
#Name1= ExpNames[1]
|
||||
#Name2= ExpNames[2]
|
||||
#Name3= ExpNames[3]
|
||||
#Name4= ExpNames[4]
|
||||
#Name5= ExpNames[5]
|
||||
print('52')
|
||||
#----------------------TABLES------------------------------------------------------------
|
||||
#import standard tables used in Sean's code That should be copied to each ExpStudy
|
||||
ontology_obo_input <- "gene_ontology_edit.obo" #paste0(Wstudy,"/Code/gene_ontology_edit.obo") # Args[5]
|
||||
GOtermstab_file <- "go_terms.tab"
|
||||
GO_ID_Arg <-"All_SGD_GOTerms_for_QHTCPtk.csv" #All_SGD_GOTerms.csv #Args[7]
|
||||
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#----------------------------------------------------------------------------
|
||||
|
||||
#The directory to put the results into. Sean puts this in the 'Home' directory
|
||||
subDir <- "../TermSpecificHeatmaps"
|
||||
|
||||
if (file.exists(subDir)){
|
||||
outputpath <- subDir
|
||||
} else {
|
||||
dir.create(file.path(subDir))
|
||||
}
|
||||
outputpath <- "../TermSpecificHeatmaps/" #paste0(Wstudy,"/TermSpecificHeatmaps/")
|
||||
print(outputpath)
|
||||
print('73')
|
||||
#-----------------------------------------------------------------------------------
|
||||
XX3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
|
||||
XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
|
||||
XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
|
||||
print('80')
|
||||
#rm(X,)
|
||||
#Exp1--------------------------------------------------
|
||||
if (file.exists(inputFile[1])){
|
||||
print('84')
|
||||
X1$ORF <- X1$OrfRep
|
||||
X1$ORF <- gsub("_1","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_2","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_3","",x=X1$ORF)
|
||||
X1$ORF <- gsub("_4","",x=X1$ORF)
|
||||
|
||||
X1$Score_L <- "No Effect"
|
||||
try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X1$Score_K <- "No Effect"
|
||||
try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
|
||||
#express the na data as 0.001 in X1 for K and L
|
||||
X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
X1$Rank_L <- rank(X1$Z_lm_L)
|
||||
X1$Rank_K <- rank(X1$Z_lm_K)
|
||||
|
||||
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
|
||||
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
|
||||
print('110')
|
||||
}
|
||||
print('112')
|
||||
#Exp2------------------------------------------------------------------------
|
||||
if (file.exists(inputFile[2])){
|
||||
X2$ORF <- X2$OrfRep
|
||||
X2$ORF <- gsub("_1","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_2","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_3","",x=X2$ORF)
|
||||
X2$ORF <- gsub("_4","",x=X2$ORF)
|
||||
|
||||
X2$Score_L <- "No Effect"
|
||||
try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X2$Score_K <- "No Effect"
|
||||
try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
#express the na data as 0.001 in X2
|
||||
X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
X2$Rank_L <- rank(X2$Z_lm_L)
|
||||
X2$Rank_K <- rank(X2$Z_lm_K)
|
||||
|
||||
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
|
||||
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
|
||||
X <- cbind(X1,X2)
|
||||
print('file2-140')
|
||||
}
|
||||
print('142')
|
||||
#Exp3--------------------------------------------------------------
|
||||
if (file.exists(inputFile[3])){
|
||||
X3$ORF <- X3$OrfRep
|
||||
X3$ORF <- gsub("_1","",x=X3$ORF)
|
||||
X3$ORF <- gsub("_2","",x=X3$ORF)
|
||||
X3$ORF <- gsub("_3","",x=X3$ORF)
|
||||
X3$ORF <- gsub("_4","",x=X3$ORF)
|
||||
|
||||
X3$Score_L <- "No Effect"
|
||||
try(X3[is.na(X3$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X3$Score_K <- "No Effect"
|
||||
try(X3[is.na(X3$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
#express the na data as 0.001 in X3
|
||||
X3[is.na(X3$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X3[is.na(X3$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
X3$Rank_L <- rank(X3$Z_lm_L)
|
||||
X3$Rank_K <- rank(X3$Z_lm_K)
|
||||
|
||||
X3 <- X3[order(X3$OrfRep,decreasing = FALSE),]
|
||||
colnames(X3) <- paste(colnames(X3),"_X3",sep="")
|
||||
X <- cbind(X,X3)
|
||||
}
|
||||
|
||||
#Exp4
|
||||
if (file.exists(inputFile[4])){
|
||||
X4$ORF <- X4$OrfRep
|
||||
X4$ORF <- gsub("_1","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_2","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_3","",x=X4$ORF)
|
||||
X4$ORF <- gsub("_4","",x=X4$ORF)
|
||||
|
||||
X4$Score_L <- "No Effect"
|
||||
try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X4$Score_K <- "No Effect"
|
||||
try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
#express the na data as 0.001 in X4
|
||||
X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
X4$Rank_L <- rank(X4$Z_lm_L)
|
||||
X4$Rank_K <- rank(X4$Z_lm_K)
|
||||
|
||||
X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
|
||||
colnames(X4) <- paste(colnames(X4),"_X4",sep="")
|
||||
X <- cbind(X,X4)
|
||||
}
|
||||
|
||||
#Exp5--------------------------------------------------------------
|
||||
if (file.exists(inputFile[5])){
|
||||
X5$ORF <- X5$OrfRep
|
||||
X5$ORF <- gsub("_1","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_2","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_3","",x=X5$ORF)
|
||||
X5$ORF <- gsub("_4","",x=X5$ORF)
|
||||
|
||||
X5$Score_L <- "No Effect"
|
||||
try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
|
||||
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
|
||||
try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
|
||||
|
||||
X5$Score_K <- "No Effect"
|
||||
try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
|
||||
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
|
||||
try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
|
||||
#express the na data as 0.001 in X5
|
||||
X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
|
||||
X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
|
||||
|
||||
X5$Rank_L <- rank(X5$Z_lm_L)
|
||||
X5$Rank_K <- rank(X5$Z_lm_K)
|
||||
|
||||
X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
|
||||
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
|
||||
X <- cbind(X,X5)
|
||||
}
|
||||
print('after5-229')
|
||||
X$ORF <- X$OrfRep_X1
|
||||
part1=1
|
||||
|
||||
if (file.exists(inputFile[1])& file.exists(inputFile[2])){
|
||||
print('234 ifFile2')
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" ]
|
||||
|
||||
X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
}
|
||||
|
||||
|
||||
if (file.exists(inputFile[3])){
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
||||
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
||||
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" ]
|
||||
#Reorder columns
|
||||
X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)] #Three
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
}
|
||||
|
||||
if (file.exists(inputFile[4])){
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
||||
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
|
||||
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
|
||||
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
||||
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
|
||||
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" ]
|
||||
#Reorder columns
|
||||
X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)] #Four
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
}
|
||||
|
||||
|
||||
if (file.exists(inputFile[5])){
|
||||
X$ORF <- gsub("_1","",x=X$ORF)
|
||||
X$ORF <- gsub("_2","",x=X$ORF)
|
||||
X$ORF <- gsub("_3","",x=X$ORF)
|
||||
X$ORF <- gsub("_4","",x=X$ORF)
|
||||
|
||||
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
|
||||
try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
|
||||
try(X[X$Gene_X3 == "",]$Gene_X3 <- X[X$Gene_X3 == "",]$OrfRep_X3)
|
||||
try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
|
||||
try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
|
||||
|
||||
X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
|
||||
colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
|
||||
colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
|
||||
colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
|
||||
colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
|
||||
colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
|
||||
colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
|
||||
colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
|
||||
colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
|
||||
colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
|
||||
colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
|
||||
#Reorder columns
|
||||
X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
|
||||
colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X3",replacement = Name3,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name4,colnames(X_heatmap))
|
||||
colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name5,colnames(X_heatmap))
|
||||
|
||||
colnames(X_heatmap)[2] <- "Gene"
|
||||
}
|
||||
|
||||
|
||||
#-----------------------------------------------------------------------------------------
|
||||
|
||||
#theme elements for plots
|
||||
theme_Publication <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size= unit(0.2, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
library(scales)
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
|
||||
(theme_foundation(base_size=base_size, base_family=base_family)
|
||||
+ theme(plot.title = element_text(face = "bold",
|
||||
size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold",size = rel(1)),
|
||||
axis.title.y = element_text(angle=90,vjust =2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.text = element_text(),
|
||||
axis.line = element_line(colour="black"),
|
||||
axis.ticks = element_line(),
|
||||
panel.grid.major = element_line(colour="#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size= unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face="italic"),
|
||||
plot.margin=unit(c(10,5,5,5),"mm"),
|
||||
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
|
||||
strip.text = element_text(face="bold")
|
||||
))
|
||||
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...){
|
||||
discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...){
|
||||
discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#www.geneontology.org/ontology/gene_ontology_edit.obo file
|
||||
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
print(Ontology)
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
print('429')
|
||||
|
||||
|
||||
#Gene_Association is the gene association to GO term file
|
||||
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
|
||||
|
||||
|
||||
#Terms is the GO term list jwr moved up to TAABLES
|
||||
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------------------------------------
|
||||
#BIG LOOP BIG LOOP ------------------------------------------------------
|
||||
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
|
||||
for(s in 1:dim(XX3)[1]){
|
||||
#Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
|
||||
#Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
|
||||
#GO_ID_Arg <- "GO:0006325"
|
||||
GO_ID_Arg_loop <- as.character(XX3[s,1])
|
||||
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
|
||||
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
|
||||
#only make plots if parent term has fewer than 500 children
|
||||
|
||||
print('for(s in 1:dim(XX3)[1]){ -454')
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
|
||||
if(length(GOTerm_parent) > 100){
|
||||
#print(length(GOTerm_parent))
|
||||
next()
|
||||
}
|
||||
|
||||
|
||||
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
|
||||
if(Parent_Size < 2){
|
||||
next()
|
||||
}
|
||||
if(Parent_Size > 2000){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
|
||||
print('pdf2000 469')
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
if(Parent_Size >= 1000 && Parent_Size <= 2000){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 500 && Parent_Size <= 1000){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 200 && Parent_Size <= 500){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 100 && Parent_Size <= 200){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 60 && Parent_Size <= 100){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 30 && Parent_Size <= 60){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size >= 3 && Parent_Size <= 30){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
if(Parent_Size == 2){
|
||||
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
|
||||
for(i in 1:length(GOTerm_parent)){
|
||||
GO_Term <- GOTerm_parent[i]
|
||||
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
|
||||
GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
|
||||
#Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
|
||||
All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
|
||||
Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
|
||||
X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
|
||||
if(dim(Genes_Annotated_to_Term)[1] > 2){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
|
||||
try(heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
|
||||
ylab = "Gene",
|
||||
cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
|
||||
keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=GO_Term_Name,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(Genes_Annotated_to_Term$Gene)))
|
||||
}
|
||||
print('740')
|
||||
}
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user