Finish PairwiseLK module

2024-07-24 21:11:25 -04:00
parent 7b8ce3e7fd
commit ef449c66ff
6 changed files with 594 additions and 481 deletions
--- a/workflow/templates/qhtcp/Code/TSHeatmaps5dev2.R
+++ b/workflow/templates/qhtcp/Code/TSHeatmaps5dev2.R
@@ -1,3 +1,16 @@
+#!/usr/bin/env Rscript
+# Makes heat maps of multiple experiments
+#
+# Updated 240724 Bryan C Roessler to improve file operations and portability
+# I tried to leave as much logic intact as possible, just feeding in vars in a better way
+# NOTE: The script now has 7 required arguments and a variable number of input experiments
+# @arg $1 string StudyInfo.csv file
+# @arg $2 string gene_ontology_edit.obo file
+# @arg $3 string go_terms.tab file
+# @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
+# @arg $5 string ZScores_interaction.csv
+# @arg $6 string base directory
+# @arg $7 string output directory

 library("ontologyIndex")
 library("ggplot2")
@@ -10,93 +23,43 @@ library("extrafont")
 library("stringr")
 library("org.Sc.sgd.db")
 library("ggrepel")
-library(gplots)
-Wstudy=getwd()
-inputFile= 'asdf'
-inputFile[1] <- "../Exp1/ZScores/ZScores_Interaction.csv" #paste0(Wstudy,"/Exp1/ZScores/ZScores_Interaction.csv")
-inputFile[2] <- "../Exp2/ZScores/ZScores_Interaction.csv"
-inputFile[3] <- "../Exp3/ZScores/ZScores_Interaction.csv"
-inputFile[4] <- "../Exp4/ZScores/ZScores_Interaction.csv"
-inputFile[5] <- "../Exp5/ZScores/ZScores_Interaction.csv"
+library("gplots")
+
+# Load arguments
+args <- commandArgs(TRUE)
+study_info_file <- args[1]
+ontology_file <- args[2]
+sgd_terms_tfile <- args[3]
+all_sgd_terms_csv <- args[4]
+zscores_file <- args[5]
+base_dir <- args[6]
+output_dir <- args[7]
+study_nums <- args[8:length(args)]

-#labels <- read.delim("ExpLabels.txt",skip=0,as.is=T,row.names=1,strip.white=TRUE)
-#labels <- read.delim("StudyInfo.csv",skip=0,as.is=T,row.names=1,strip.white=TRUE)
-labels<- read.csv(file= "StudyInfo.csv",stringsAsFactors = FALSE)
-lstExpNum= 'asdf'
-a=0
-#R can't index data.frame's so some stupid nasty code must be implemented
-if (file.exists(inputFile[1])){
-  X1 <- (read.csv(file = inputFile[1],stringsAsFactors=FALSE,header = TRUE))
-  a=a+1
-  lstExpNum[a]= 1
-  Name1 <- labels[1,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
-                
-if (file.exists(inputFile[2])){
-  X2 <- (read.csv(file = inputFile[2],stringsAsFactors=FALSE,header = TRUE))
-  a=a+1
-  lstExpNum[a]= 2
-  Name2 <- labels[2,2]}
-if (file.exists(inputFile[3])){
-  X3 <- (read.csv(file = inputFile[3],stringsAsFactors=FALSE,header = TRUE))
-  a=a+1
-  lstExpNum[a]= 3
-  Name3 <- labels[3,2]} #ssArg2 These are now supplied by Code/ExpLabels.txt which is user edited
-                   
-if (file.exists(inputFile[4])){
-  X4 <- (read.csv(file = inputFile[4],stringsAsFactors=FALSE,header = TRUE))
-  a=a+1
-  lstExpNum[a]= 4
-  Name4 <- labels[4,2]}     
-if (file.exists(inputFile[5])){
-  X5 <- (read.csv(file = inputFile[5],stringsAsFactors=FALSE,header = TRUE))
-  a=a+1
-  lstExpNum[a]= 5
-  Name5 <- labels[5,2]} 
-#--------------------------Standard Names-------------------------------------------------------
-#ExpNames= c('Exp1','Exp2','Exp3','Exp4','Exp5')
-#expNumber1<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[1]))
-#expNumber2<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[2]))
-#expNumber3<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[3]))
-#expNumber4<- as.numeric(sub("^.*?(\\d+)$", "\\1", expNm[4]))
-#Name1= ExpNames[1]
-#Name2= ExpNames[2]
-#Name3= ExpNames[3]
-#Name4= ExpNames[4]
-#Name5= ExpNames[5] 
-print('52')
-#----------------------TABLES------------------------------------------------------------
 #import standard tables used in Sean's code That should be copied to each ExpStudy
-ontology_obo_input <- "gene_ontology_edit.obo" #paste0(Wstudy,"/Code/gene_ontology_edit.obo")        # Args[5]
-GOtermstab_file <- "go_terms.tab"
-GO_ID_Arg <-"All_SGD_GOTerms_for_QHTCPtk.csv"     #All_SGD_GOTerms.csv                #Args[7]
-
-Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
+labels<- read.csv(file=study_info_file,stringsAsFactors = FALSE)
+Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
 GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) #all ORFs associated with GO term
-Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
-#----------------------------------------------------------------------------
-
-#The directory to put the results into. Sean puts this in the 'Home' directory
-subDir <- "../TermSpecificHeatmaps" 
-
-if (file.exists(subDir)){
-  outputpath <- subDir
-} else {
-  dir.create(file.path(subDir))
-}
-outputpath <- "../TermSpecificHeatmaps/"  #paste0(Wstudy,"/TermSpecificHeatmaps/")  
-print(outputpath)
-print('73')
-#-----------------------------------------------------------------------------------
-XX3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
-
+Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+XX3 <- read.csv(file=all_sgd_terms_csv,stringsAsFactors=FALSE,header = TRUE)
 XX3[,1] <- paste("GO:",formatC(XX3[,1],width=7,flag="0"),sep="")
 XX3[,2] <- gsub(pattern = " ",replacement = "_",x = XX3[,2])
 XX3[,2] <- gsub(pattern = "/",replacement = "_",x = XX3[,2])
-print('80')
-#rm(X,)
-#Exp1--------------------------------------------------
-if (file.exists(inputFile[1])){
-  print('84')
+
+# Load input files
+for (study_num in study_nums) {
+  input_file <- file.path(base_dir,paste('Exp',study_num),zscores_file)
+  if (file.exists(input_file)) {
+    assign(paste(X, study_num), read.csv(file=input_file,stringsAsFactors=FALSE,header=TRUE))
+    assign(paste(Name, study_num), labels[study_num,2])
+  }
+}
+
+for (study_num in study_nums) {
+  eval(paste("function",study_num))
+}
+
+if (length(study_nums) > 0) {
  X1$ORF <- X1$OrfRep
  X1$ORF <- gsub("_1","",x=X1$ORF)
  X1$ORF <- gsub("_2","",x=X1$ORF)
@@ -121,11 +84,9 @@ if (file.exists(inputFile[1])){
  
  X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
  colnames(X1) <- paste(colnames(X1),"_X1",sep="")
-  print('110')  
 }
-print('112')
-#Exp2------------------------------------------------------------------------
-if (file.exists(inputFile[2])){
+
+if (length(study_nums) > 1) {
  X2$ORF <- X2$OrfRep
  X2$ORF <- gsub("_1","",x=X2$ORF)
  X2$ORF <- gsub("_2","",x=X2$ORF)
@@ -151,11 +112,9 @@ if (file.exists(inputFile[2])){
  X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
  colnames(X2) <- paste(colnames(X2),"_X2",sep="")
  X <- cbind(X1,X2)
-  print('file2-140')
 }
-print('142')
-#Exp3--------------------------------------------------------------
-if (file.exists(inputFile[3])){
+
+if (length(study_nums) > 2) {
  X3$ORF <- X3$OrfRep
  X3$ORF <- gsub("_1","",x=X3$ORF)
  X3$ORF <- gsub("_2","",x=X3$ORF)
@@ -183,8 +142,7 @@ if (file.exists(inputFile[3])){
  X <- cbind(X,X3)
 }  

-#Exp4
-if (file.exists(inputFile[4])){
+if (length(study_nums) > 3) {
  X4$ORF <- X4$OrfRep
  X4$ORF <- gsub("_1","",x=X4$ORF)
  X4$ORF <- gsub("_2","",x=X4$ORF)
@@ -212,8 +170,7 @@ if (file.exists(inputFile[4])){
  X <- cbind(X,X4)
 }

-#Exp5--------------------------------------------------------------
-if (file.exists(inputFile[5])){
+if (length(study_nums) > 4) {
  X5$ORF <- X5$OrfRep
  X5$ORF <- gsub("_1","",x=X5$ORF)
  X5$ORF <- gsub("_2","",x=X5$ORF)
@@ -240,12 +197,11 @@ if (file.exists(inputFile[5])){
  colnames(X5) <- paste(colnames(X5),"_X5",sep="")
  X <- cbind(X,X5)
 }
-print('after5-229')
-X$ORF <- X$OrfRep_X1
-part1=1

-if (file.exists(inputFile[1])& file.exists(inputFile[2])){
-  print('234 ifFile2')
+X$ORF <- X$OrfRep_X1
+
+if (length(study_nums) > 1) {
+
  X$ORF <- gsub("_1","",x=X$ORF)
  
  try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
@@ -263,7 +219,7 @@ if (file.exists(inputFile[1])& file.exists(inputFile[2])){
 }


-if (file.exists(inputFile[3])){
+if (length(study_nums) > 2) {
  X$ORF <- gsub("_1","",x=X$ORF)
  X$ORF <- gsub("_2","",x=X$ORF)
  
@@ -286,7 +242,7 @@ if (file.exists(inputFile[3])){
  colnames(X_heatmap)[2] <- "Gene"
 }

-if (file.exists(inputFile[4])){
+if (length(study_nums) > 3) {
  X$ORF <- gsub("_1","",x=X$ORF)
  X$ORF <- gsub("_2","",x=X$ORF)
  X$ORF <- gsub("_3","",x=X$ORF)
@@ -315,7 +271,7 @@ if (file.exists(inputFile[4])){
 }


-if (file.exists(inputFile[5])){
+if (length(study_nums) > 4) {
  X$ORF <- gsub("_1","",x=X$ORF)
  X$ORF <- gsub("_2","",x=X$ORF)
  X$ORF <- gsub("_3","",x=X$ORF)
@@ -350,8 +306,6 @@ if (file.exists(inputFile[5])){
 }


-#-----------------------------------------------------------------------------------------
-
 #theme elements for plots
 theme_Publication <- function(base_size=14, base_family="sans") {
  (theme_foundation(base_size=base_size, base_family=base_family)
@@ -434,27 +388,15 @@ scale_colour_Publication <- function(...){
 }


-
-#www.geneontology.org/ontology/gene_ontology_edit.obo file
-Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
+Ontology <- get_ontology(file=ontology_file,propagate_relationships = "is_a",extract_tags = "minimal")
 print(Ontology)
 #all ORFs associated with GO term
 GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
-print('429')
-
-
-#Gene_Association is the gene association to GO term file
-#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
-#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
-#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
-

 #Terms is the GO term list jwr moved up to TAABLES
-Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+Terms <- read.delim(file=sgd_terms_tfile,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))

-#-----------------------------------------------------------------------

-#-----------------------------------------------------------------------------------------------------
 #BIG LOOP BIG LOOP ------------------------------------------------------
 colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
 for(s in 1:dim(XX3)[1]){
@@ -465,24 +407,19 @@ for(s in 1:dim(XX3)[1]){
  GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
  #GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
  #only make plots if parent term has fewer than 500 children
-  
-  print('for(s in 1:dim(XX3)[1]){  -454')
 
-   Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
  
  if(length(GOTerm_parent) > 100){
-    #print(length(GOTerm_parent))
    next()
  }
  
-  
  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
  if(Parent_Size < 2){
    next()
  }
  if(Parent_Size > 2000){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
-    print('pdf2000 469')
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -509,7 +446,7 @@ for(s in 1:dim(XX3)[1]){
  }
  
  if(Parent_Size >= 1000 && Parent_Size <= 2000){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -535,7 +472,7 @@ for(s in 1:dim(XX3)[1]){
    dev.off()
  }
  if(Parent_Size >= 500 && Parent_Size <= 1000){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -561,7 +498,7 @@ for(s in 1:dim(XX3)[1]){
    dev.off()
  }
  if(Parent_Size >= 200 && Parent_Size <= 500){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -587,7 +524,7 @@ for(s in 1:dim(XX3)[1]){
    dev.off()
  }
  if(Parent_Size >= 100 && Parent_Size <= 200){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -613,7 +550,7 @@ for(s in 1:dim(XX3)[1]){
    dev.off()
  }
  if(Parent_Size >= 60 && Parent_Size <= 100){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -639,7 +576,7 @@ for(s in 1:dim(XX3)[1]){
    dev.off()
  }
  if(Parent_Size >= 30 && Parent_Size <= 60){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -679,7 +616,7 @@ for(s in 1:dim(XX3)[1]){
    dev.off()
  }
  if(Parent_Size >= 3 && Parent_Size <= 30){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -718,7 +655,7 @@ for(s in 1:dim(XX3)[1]){
    dev.off()
  }
  if(Parent_Size == 2){
-    pdf(file=paste(outputpath,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    pdf(file=paste(output_dir,XX3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
    for(i in 1:length(GOTerm_parent)){
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
@@ -753,13 +690,7 @@ for(s in 1:dim(XX3)[1]){
                      #ColSideColors=ev_repeat,
                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
      }
-      print('740')
    }
    dev.off()
  }
-  
-  
-  
-  
 }
-