Finish PairwiseLK module

This commit is contained in:
2024-07-24 21:11:25 -04:00
parent 7b8ce3e7fd
commit ef449c66ff
6 changed files with 594 additions and 481 deletions

View File

@@ -1,3 +1,16 @@
#!/usr/bin/env Rscript
# Makes heat maps of multiple experiments
#
# Updated 240724 Bryan C Roessler to improve file operations and portability
# I tried to leave as much logic intact as possible, just feeding in vars in a better way
# NOTE: The script now has 7 required arguments and a variable number of input experiments
# @arg $1 string StudyInfo.csv file
# @arg $2 string gene_ontology_edit.obo file
# @arg $3 string go_terms.tab file
# @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
# @arg $5 string ZScores_interaction.csv
# @arg $6 string base directory
# @arg $7 string output directory
library("ontologyIndex")
library("ggplot2")
@@ -10,93 +23,43 @@ library("extrafont")
library("stringr")
library("org.Sc.sgd.db")
library("ggrepel")
library(gplots)
Wstudy=getwd()
inputFile= 'asdf'
inputFile[1] <- "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
inputFile[2] <- "../Exp2/ZScores/ZScores_Interaction.csv"
inputFile[3] <- "../Exp3/ZScores/ZScores_Interaction.csv"
inputFile[4] <- "../Exp4/ZScores/ZScores_Interaction.csv"
inputFile[5] <- "../Exp5/ZScores/ZScores_Interaction.csv"
library("gplots")
# Load arguments
args <- commandArgs(TRUE)
study_info_file <- args[1]
ontology_file <- args[2]
sgd_terms_tfile <- args[3]
all_sgd_terms_csv <- args[4]
zscores_file <- args[5]
base_dir <- args[6]
output_dir <- args[7]
study_nums <- args[8:length(args)]
#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
lstExpNum= 'asdf'
a=0
#R can't index data.frame's so some stupid nasty code must be implemented
if (file.exists(inputFile[1])){
X1 <- (read.csv(file = inputFile[1],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 1
Name1 <- labels[1,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
if (file.exists(inputFile[2])){
X2 <- (read.csv(file = inputFile[2],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 2
Name2 <- labels[2,2]}
if (file.exists(inputFile[3])){
X3 <- (read.csv(file = inputFile[3],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 3
Name3 <- labels[3,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
if (file.exists(inputFile[4])){
X4 <- (read.csv(file = inputFile[4],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 4
Name4 <- labels[4,2]}
if (file.exists(inputFile[5])){
X5 <- (read.csv(file = inputFile[5],stringsAsFactors=FALSE,header = TRUE))
a=a+1
lstExpNum[a]= 5
Name5 <- labels[5,2]}
#--------------------------Standard Names-------------------------------------------------------
#ExpNames= c('Exp1','Exp2','Exp3','Exp4','Exp5')
#expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
#expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
#expNumber3<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[3]))
#expNumber4<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[4]))
#Name1= ExpNames[1]
#Name2= ExpNames[2]
#Name3= ExpNames[3]
#Name4= ExpNames[4]
#Name5= ExpNames[5]
print('52')
#----------------------TABLES------------------------------------------------------------
#import standard tables used in Sean's code That should be copied to each ExpStudy
ontology_obo_input <- "gene_ontology_edit.obo" #paste0(Wstudy,"/Code/gene_ontology_edit.obo") # Args[5]
GOtermstab_file <- "go_terms.tab"
GO_ID_Arg <-"All_SGD_GOTerms_for_QHTCPtk.csv" #All_SGD_GOTerms.csv #Args[7]
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE)
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#----------------------------------------------------------------------------
#The directory to put the results into. Sean puts this in the 'Home' directory
subDir <- "../TermSpecificHeatmaps"
if (file.exists(subDir)){
outputpath <- subDir
} else {
dir.create(file.path(subDir))
}
outputpath <- "../TermSpecificHeatmaps/" #paste0(Wstudy,"/TermSpecificHeatmaps/")
print(outputpath)
print('73')
#-----------------------------------------------------------------------------------
XX3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
XX3 <- read.csv(file=all_sgd_terms_csv,stringsAsFactors=FALSE,header = TRUE)
XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
print('80')
#rm(X,)
#Exp1--------------------------------------------------
if (file.exists(inputFile[1])){
print('84')
# Load input files
for (study_num in study_nums) {
input_file <- file.path(base_dir,paste('Exp',study_num),zscores_file)
if (file.exists(input_file)) {
assign(paste(X, study_num), read.csv(file=input_file,stringsAsFactors=FALSE,header=TRUE))
assign(paste(Name, study_num), labels[study_num,2])
}
}
for (study_num in study_nums) {
eval(paste("function",study_num))
}
if (length(study_nums) > 0) {
X1$ORF <- X1$OrfRep
X1$ORF <- gsub("_1","",x=X1$ORF)
X1$ORF <- gsub("_2","",x=X1$ORF)
@@ -121,11 +84,9 @@ if (file.exists(inputFile[1])){
X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
colnames(X1) <- paste(colnames(X1),"_X1",sep="")
print('110')
}
print('112')
#Exp2------------------------------------------------------------------------
if (file.exists(inputFile[2])){
if (length(study_nums) > 1) {
X2$ORF <- X2$OrfRep
X2$ORF <- gsub("_1","",x=X2$ORF)
X2$ORF <- gsub("_2","",x=X2$ORF)
@@ -151,11 +112,9 @@ if (file.exists(inputFile[2])){
X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
colnames(X2) <- paste(colnames(X2),"_X2",sep="")
X <- cbind(X1,X2)
print('file2-140')
}
print('142')
#Exp3--------------------------------------------------------------
if (file.exists(inputFile[3])){
if (length(study_nums) > 2) {
X3$ORF <- X3$OrfRep
X3$ORF <- gsub("_1","",x=X3$ORF)
X3$ORF <- gsub("_2","",x=X3$ORF)
@@ -183,8 +142,7 @@ if (file.exists(inputFile[3])){
X <- cbind(X,X3)
}
#Exp4
if (file.exists(inputFile[4])){
if (length(study_nums) > 3) {
X4$ORF <- X4$OrfRep
X4$ORF <- gsub("_1","",x=X4$ORF)
X4$ORF <- gsub("_2","",x=X4$ORF)
@@ -212,8 +170,7 @@ if (file.exists(inputFile[4])){
X <- cbind(X,X4)
}
#Exp5--------------------------------------------------------------
if (file.exists(inputFile[5])){
if (length(study_nums) > 4) {
X5$ORF <- X5$OrfRep
X5$ORF <- gsub("_1","",x=X5$ORF)
X5$ORF <- gsub("_2","",x=X5$ORF)
@@ -240,12 +197,11 @@ if (file.exists(inputFile[5])){
colnames(X5) <- paste(colnames(X5),"_X5",sep="")
X <- cbind(X,X5)
}
print('after5-229')
X$ORF <- X$OrfRep_X1
part1=1
if (file.exists(inputFile[1])& file.exists(inputFile[2])){
print('234 ifFile2')
X$ORF <- X$OrfRep_X1
if (length(study_nums) > 1) {
X$ORF <- gsub("_1","",x=X$ORF)
try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
@@ -263,7 +219,7 @@ if (file.exists(inputFile[1])& file.exists(inputFile[2])){
}
if (file.exists(inputFile[3])){
if (length(study_nums) > 2) {
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
@@ -286,7 +242,7 @@ if (file.exists(inputFile[3])){
colnames(X_heatmap)[2] <- "Gene"
}
if (file.exists(inputFile[4])){
if (length(study_nums) > 3) {
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
@@ -315,7 +271,7 @@ if (file.exists(inputFile[4])){
}
if (file.exists(inputFile[5])){
if (length(study_nums) > 4) {
X$ORF <- gsub("_1","",x=X$ORF)
X$ORF <- gsub("_2","",x=X$ORF)
X$ORF <- gsub("_3","",x=X$ORF)
@@ -350,8 +306,6 @@ if (file.exists(inputFile[5])){
}
#-----------------------------------------------------------------------------------------
#theme elements for plots
theme_Publication <- function(base_size=14, base_family="sans") {
(theme_foundation(base_size=base_size, base_family=base_family)
@@ -434,27 +388,15 @@ scale_colour_Publication <- function(...){
}
#www.geneontology.org/ontology/gene_ontology_edit.obo file
Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
print(Ontology)
#all ORFs associated with GO term
GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
print('429')
#Gene_Association is the gene association to GO term file
#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
#Terms is the GO term list jwr moved up to TAABLES
Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------------------------------------
#BIG LOOP BIG LOOP ------------------------------------------------------
colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
for(s in 1:dim(XX3)[1]){
@@ -465,24 +407,19 @@ for(s in 1:dim(XX3)[1]){
GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
#GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
#only make plots if parent term has fewer than 500 children
print('for(s in 1:dim(XX3)[1]){ -454')
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(length(GOTerm_parent) > 100){
#print(length(GOTerm_parent))
next()
}
Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
if(Parent_Size < 2){
next()
}
if(Parent_Size > 2000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
print('pdf2000 469')
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -509,7 +446,7 @@ for(s in 1:dim(XX3)[1]){
}
if(Parent_Size >= 1000 && Parent_Size <= 2000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -535,7 +472,7 @@ for(s in 1:dim(XX3)[1]){
dev.off()
}
if(Parent_Size >= 500 && Parent_Size <= 1000){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -561,7 +498,7 @@ for(s in 1:dim(XX3)[1]){
dev.off()
}
if(Parent_Size >= 200 && Parent_Size <= 500){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -587,7 +524,7 @@ for(s in 1:dim(XX3)[1]){
dev.off()
}
if(Parent_Size >= 100 && Parent_Size <= 200){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -613,7 +550,7 @@ for(s in 1:dim(XX3)[1]){
dev.off()
}
if(Parent_Size >= 60 && Parent_Size <= 100){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -639,7 +576,7 @@ for(s in 1:dim(XX3)[1]){
dev.off()
}
if(Parent_Size >= 30 && Parent_Size <= 60){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -679,7 +616,7 @@ for(s in 1:dim(XX3)[1]){
dev.off()
}
if(Parent_Size >= 3 && Parent_Size <= 30){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -718,7 +655,7 @@ for(s in 1:dim(XX3)[1]){
dev.off()
}
if(Parent_Size == 2){
pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
for(i in 1:length(GOTerm_parent)){
GO_Term <- GOTerm_parent[i]
GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -753,13 +690,7 @@ for(s in 1:dim(XX3)[1]){
#ColSideColors=ev_repeat,
labRow=as.character(Genes_Annotated_to_Term$Gene)))
}
print('740')
}
dev.off()
}
}