Add gta module
This commit is contained in:
@@ -1,59 +1,70 @@
|
||||
#GTA (GoTermAveraging) Starting (Working Directory is /Code) All paths relative to /Code
|
||||
#Your output may not be reproducible as org.Sc.sgd.db is uploaded from Bioconductor R library and changes
|
||||
#Loops thru the number of experiments involved in study. JWR
|
||||
Wstudy= getwd()
|
||||
if (file.exists('../Exp1/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile <- '../Exp1/ZScores/ZScores_Interaction.csv'
|
||||
expName= "Exp1"
|
||||
dir.create("../GTAresults/Exp1")
|
||||
}
|
||||
if (file.exists('../Exp2/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[2] <- '../Exp2/ZScores/ZScores_Interaction.csv'
|
||||
expName[2]= "Exp2"
|
||||
dir.create("../GTAresults/Exp2")
|
||||
}
|
||||
if (file.exists('../Exp3/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[3] <- '../Exp3/ZScores/ZScores_Interaction.csv'
|
||||
expName[3]= "Exp3"
|
||||
dir.create("../GTAresults/Exp3")
|
||||
}
|
||||
if (file.exists('../Exp4/ZScores/ZScores_Interaction.csv')){
|
||||
inputFile[4] <- '../Exp4/ZScores/ZScores_Interaction.csv'
|
||||
expName[4]= "Exp4"
|
||||
dir.create("../GTAresults/Exp4")
|
||||
}
|
||||
|
||||
outputPathGTA= "../GTAresults"
|
||||
#dir.create(outPathGTA)
|
||||
#!/usr/bin/env R
|
||||
# GTA (GoTermAveraging)
|
||||
# Your output may not be reproducible as org.Sc.sgd.db is uploaded from Bioconductor R library and changes
|
||||
#
|
||||
# Updated 240724 Bryan C Roessler to improve file operations and portability
|
||||
# NOTE: The script now has 2 additional OPTIONAL arguments:
|
||||
# 1. Path to SGD terms file (go.terms.tab)
|
||||
# 2. Path to SGD features file (gene_association.sgd)
|
||||
|
||||
library("stringr")
|
||||
library("org.Sc.sgd.db")
|
||||
library("plyr")
|
||||
#build in command args to apply this code to a given !!results sheet
|
||||
|
||||
SGD_Terms_file <- "../Code/go_terms.tab" #ArgsScore[2]
|
||||
#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
|
||||
SGD_features_file <- "../Code/gene_association.sgd" #ArgsScore[3]
|
||||
# Parse arguments
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
#R and Rstudio have issues: The for loop(189) seemed to fail to evaluate the paste (or paste0) to build the inputFile inside the for loop and bail as the second loop began. This crude fix below, seems to have alleviated the failure to loop problem at least for now. Also for some annoying reason, the underscores between word are sometimes not shown when they exist. No ryme of reason!!!
|
||||
exp_name <- args[1]
|
||||
|
||||
if (length(args) > 2) {
|
||||
zscores_file <- args[2]
|
||||
} else {
|
||||
zscores_file <- "ZScores/ZScores_Interaction.csv" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
|
||||
}
|
||||
|
||||
if (length(args) > 3) {
|
||||
sgd_terms_file <- args[3]
|
||||
} else {
|
||||
sgd_terms_file <- "../Code/go_terms.tab"
|
||||
}
|
||||
|
||||
if (length(args) > 4) {
|
||||
sgd_features_file <- args[4]
|
||||
} else {
|
||||
sgd_features_file <- "../Code/gene_association.sgd" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
|
||||
}
|
||||
|
||||
if (length(args) > 5) {
|
||||
output_dir <- args[5]
|
||||
} else {
|
||||
output_dir <- "../GTAresults" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
|
||||
}
|
||||
|
||||
|
||||
|
||||
# # Set SGDgeneList file path
|
||||
# if (length(args) > 4) {
|
||||
# SGDgeneList <- args[4]
|
||||
# } else {
|
||||
# SGDgeneList <- "../Code/SGD_features.tab"
|
||||
|
||||
|
||||
#Begin for loop for experiments in this study-----------------ZScores_Interaction.csv
|
||||
for(m in 1:length(inputFile)){
|
||||
for(m in 1:length(zscores_file)){
|
||||
|
||||
#inputFile <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1]
|
||||
X <- read.csv(file = inputFile[m],stringsAsFactors=FALSE,header = TRUE)
|
||||
#zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="") #ArgsScore[1]
|
||||
X <- read.csv(file = zscores_file[m],stringsAsFactors=FALSE,header = TRUE)
|
||||
|
||||
if(colnames(X)[1] == "OrfRep"){
|
||||
colnames(X)[1] <- "ORF"
|
||||
}
|
||||
|
||||
#Terms is the GO term list
|
||||
Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
Terms <- read.delim(file = sgd_terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
|
||||
#all ORFs associated with GO term
|
||||
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
|
||||
#Gene_Association is the gene association to GO term file
|
||||
Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
Gene_Association <- read.delim(sgd_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
|
||||
#Get the ORF names associated with each gene/GO term
|
||||
Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
|
||||
#Get the numeric GO ID for matching
|
||||
@@ -140,7 +151,7 @@ for(m in 1:length(inputFile)){
|
||||
X2 <- X2[,order(names(X2))]
|
||||
X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
#create output file
|
||||
write.csv(X2,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_All.csv",sep=""),row.names=FALSE)
|
||||
write.csv(X2,file=paste(output_dir,"/",expName[m],"/Average_GOTerms_All.csv",sep=""),row.names=FALSE)
|
||||
#remove NAs
|
||||
X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
|
||||
#identify redundant GO terms
|
||||
@@ -167,21 +178,21 @@ for(m in 1:length(inputFile)){
|
||||
}
|
||||
Y1 <- unique(Y)
|
||||
|
||||
write.csv(Y1,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
|
||||
write.csv(Y1,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
|
||||
Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
|
||||
write.csv(Y2,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
|
||||
write.csv(Y2,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y3 <- Y2[Y2$NumGenes_Avg > 2,]
|
||||
write.csv(Y3,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
write.csv(Y3,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
|
||||
Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
|
||||
write.csv(Y4,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
|
||||
write.csv(Y4,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
|
||||
|
||||
Y5 <- Y4[Y4$NumGenes_Avg > 2,]
|
||||
write.csv(Y5,file=paste(outputPathGTA,"/",expName[m],"/Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
write.csv(Y5,file=paste(output_dir,"/",exp_name,"/Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
|
||||
|
||||
#End of 'for loop'
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env R
|
||||
# Based on InteractionTemplate.R which is based on Sean Santos's Interaction_V5 script
|
||||
#
|
||||
# Updated 240723 Bryan C Roessler to improve file operations and portability
|
||||
# Updated 240724 Bryan C Roessler to improve file operations and portability
|
||||
# NOTE: The script now has 4 additional OPTIONAL arguments:
|
||||
# 1. Path to input file
|
||||
# 2. /output/ directory
|
||||
|
||||
Reference in New Issue
Block a user