Commit earlier refactoring

2024-07-29 11:44:45 -04:00
parent 29cbce0754
commit 527068e683
294 changed files with 5524008 additions and 0 deletions
--- a/workflow/.old/apps/r/SSscripts/18_0205_heatmaps_zscores_2SD_color_NARem_Z_lm.R
+++ b/workflow/.old/apps/r/SSscripts/18_0205_heatmaps_zscores_2SD_color_NARem_Z_lm.R
@@ -0,0 +1,296 @@
+timestamp()
+#version 10 - edited the way clusters are found - problem with clusters 1-0-1 having genes in it from 1-0-11
+
+Args <- commandArgs(TRUE)
+#need to give the input "finalTable.csv" file after running REMc generated by eclipse
+input_finalTable <- Args[1]
+
+subDir <- Args[2]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
+outputpath <- Args[2]
+
+library(RColorBrewer)
+library(gplots)
+
+hmapfile <- data.frame(read.csv(file=input_finalTable,header=TRUE,sep=",",stringsAsFactors = FALSE))
+
+#set NAs to NA
+hmapfile[hmapfile == -100] <- NA
+hmapfile[hmapfile == 100] <- NA
+hmapfile[hmapfile == 0.001] <- NA
+hmapfile[hmapfile == -0.001] <- NA
+
+
+#select the number of rows based on the number of genes
+num_total_genes <- length(hmapfile[,1])
+
+#break out the cluster names so each part of the cluster origin can be accessed
+#line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
+#hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
+
+hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
+hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
+#use tail(x,n) for accessing the outward most cluster
+
+clust_rounds <- 0
+for(i in 1:num_total_genes){
+  if(length(hmapfile$cluster.origin[[i]]) > clust_rounds){
+    clust_rounds <- length(hmapfile$cluster.origin[[i]])
+  }
+}
+
+unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
+unique_clusts <- unique_clusts[unique_clusts != " "]
+
+#select only the unique cluster names
+unique_clusts <- sort(unique(unlist(unique_clusts,use.names= FALSE)),decreasing=FALSE)
+num_unique_clusts <- length(unique_clusts)
+
+#base the color key on a statistical analysis of the L and K data
+#need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
+#scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
+
+#hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
+
+#change so that the L data is multiplied to be on the same scale as the K data
+
+KEY_MIN <- 0
+KEY_MAX <- 0
+K_MIN <- 0
+L_MAX <- 0
+KcolumnValues <- vector()
+LcolumnValues <- vector()
+
+for(i in 4:(length(hmapfile[1,]) - 2)){
+  if(grepl("_Z_lm_K",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
+    KcolumnValues <- append(KcolumnValues,i)
+  }
+  if(grepl("_Z_lm_L",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
+    LcolumnValues <- append(LcolumnValues,i)
+  }
+}
+
+#L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[4]
+#K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[2]
+
+#L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm=TRUE)[4]
+#K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm=TRUE)[2]
+
+#Z scores are
+L_MAX <- 12
+K_MIN <- -12
+
+#L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
+#hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
+
+#if(grepl("SHIFT",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
+#  print("FOUND SHIFT VALUES")
+#  hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
+#}
+
+#KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
+#KEY_MIN <- as.numeric(K_MIN)
+
+KEY_MAX <- as.numeric(L_MAX)
+KEY_MIN <- as.numeric(K_MIN)
+
+print(KEY_MIN)
+print(L_MAX)
+#print(L_Multiplier)
+
+colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
+#print(colormapbreaks)
+
+#probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
+#however since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
+#then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
+#could we pool all the shift data/average it?
+if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
+  even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=2)
+  #ev_repeat = rep("white",length(even_columns))
+  #ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
+  #middle_col <- (length(hmapfile[1,]) - 5)/2
+  #ev_repeat[(middle_col/2)] <- "black"
+  #print(ev_repeat)
+}
+
+if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == FALSE){
+  even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=1)
+  print("NO SHIFT VALS FOUND")
+}
+
+#FOR THIS SCRIPT ONLY (rap tem hu script)
+#even_columns <- c(2,5,7,10,12,15,17)
+
+#m <- 0
+colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1,]) - 2)])
+#print(colnames_edit)
+for(i in 1:length(colnames_edit)){
+  if(grepl("Shift",colnames_edit[i],fixed=TRUE) == TRUE){
+    colnames_edit[i] <- ""
+    colnames_edit[i+1] <- gsub(pattern = "_Z_lm_",replacement = " ",x = colnames_edit[i+1])
+    try(colnames_edit[i+1] <- gsub(pattern = "_",replacement = " ",x = colnames_edit[i+1]))
+    
+    # INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
+    # print(length(unlist(INT_store)))
+    # if(length(unlist(INT_store)) == 4){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
+    # }
+    # if(length(unlist(INT_store)) == 3){
+    # 
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
+    # }
+    # if(length(unlist(INT_store)) == 5){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep=" ")
+    # }
+    # if(length(unlist(INT_store)) == 6){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
+    # }
+
+  }
+}
+print(colnames_edit)
+#break()
+#colnames_edit[5] <- "TEM HLEG K"
+#colnames_edit[10] <- "TEM HL K"
+#colnames_edit[15] <- "TEM HLEG L"
+#colnames_edit[20] <- "TEM HL L"
+
+
+#create the heatmaps
+for(i in 1:num_unique_clusts){
+  cluster <- unique_clusts[i]
+  cluster_data <- subset(hmapfile,grepl(cluster,cluster.origin))
+  cluster_length <- length(cluster_data[,1])
+  if(cluster_length != 1){
+    X0 <- as.matrix(cluster_data[,4:(length(hmapfile[1,]) - 2)])
+    if(cluster_length >= 2001){
+      mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
+      pdf(file=mypath,height=20,width=15)
+      heatmap.2(x=X0,
+                Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+                breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor= "white", offsetCol = 0.1,
+                #zlim=c(-132,132), 
+                xlab = "Type of Media", ylab = "Gene Name",
+                #cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
+                keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
+                na.color="red", col=brewer.pal(11,"PuOr"),
+                main=cluster, 
+                #ColSideColors=ev_repeat,
+                labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
+      #abline(v=0.5467,col="black")
+      dev.off()
+    }
+    if(cluster_length >= 201 && cluster_length <= 2000){
+      mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
+      pdf(file=mypath,height=15,width=12)
+      heatmap.2(x=X0,
+                Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+                breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
+                #zlim=c(-132,132), 
+                xlab = "Type of Media", ylab = "Gene Name",
+                cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
+                keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
+                na.color="red", col=brewer.pal(11,"PuOr"),
+                main=cluster,
+                labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
+      #abline(v=0.5316,col="black")
+      dev.off()
+    }
+    if(cluster_length >= 150 && cluster_length <= 200){
+      mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
+      pdf(file=mypath,height=12,width=12)
+      heatmap.2(x=X0,
+                Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+                breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
+                #zlim=c(-132,132), 
+                xlab = "Type of Media", ylab = "Gene Name",
+                cellnote = round(X0,digits=0), notecex = 0.2, key=TRUE,
+                keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
+                na.color="red", col=brewer.pal(11,"PuOr"),
+                main=cluster,
+                labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
+      dev.off()
+    }
+    if(cluster_length >= 101 && cluster_length <= 149){
+      mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
+      pdf(file=mypath,mypath,height=12,width=12)
+      heatmap.2(x=X0,
+                Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
+                breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
+                #zlim=c(-132,132), 
+                xlab = "Type of Media", ylab = "Gene Name",
+                cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
+                keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
+                na.color="red", col=brewer.pal(11,"PuOr"),
+                main=cluster,
+                labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
+      dev.off()
+    }
+    if(cluster_length >= 60 && cluster_length <= 100){
+      mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
+      pdf(file=mypath,height=12,width=12)
+      heatmap.2(x=X0,
+                Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
+                breaks=colormapbreaks,symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
+                #zlim=c(-132,132),
+                xlab = "Type of Media", ylab = "Gene Name",
+                cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
+                keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
+                na.color="red", col=brewer.pal(11,"PuOr"),
+                main=cluster,
+                labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
+      dev.off()
+    }
+    if(cluster_length <= 59 && cluster_length >= 30){
+      mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
+      pdf(file=mypath,height=9,width=12)
+      heatmap.2(x=X0,
+                Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
+                breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
+                #zlim=c(-132,132), 
+                xlab = "Type of Media", ylab = "Gene Name",
+                cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
+                keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
+                na.color="red", col=brewer.pal(11,"PuOr"),
+                main=cluster,
+                labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
+      dev.off()
+    }
+    if(cluster_length <= 29){
+      mypath = file.path(outputpath,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
+      pdf(file=mypath,height=7,width=12)
+      heatmap.2(x=X0,
+                Rowv=TRUE, Colv=NA, 
+                distfun = dist, hclustfun = hclust,
+                dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
+                breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
+                #zlim=c(-132,132), 
+                xlab = "Type of Media", ylab = "Gene Name",
+                cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
+                keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
+                na.color="red", col=brewer.pal(11,"PuOr"),
+                main=cluster,
+                labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
+      dev.off()
+    }
+  }
+  #print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
+}
+
+
+timestamp()
+
--- a/workflow/.old/apps/r/SSscripts/22_0603_Remy_Exclude_DAmPs.R
+++ b/workflow/.old/apps/r/SSscripts/22_0603_Remy_Exclude_DAmPs.R
@@ -0,0 +1,39 @@
+# This version of Exclude_DAmPs was modified by Remy. It 
+# assumes that underscores included in OrfRep names are 
+# already in the DAmPList, because in some cases an OrfRep
+# may have _1 being a non-DAmP and _2 being a DAmP. It is
+# not a general rule so it is better just to use a list 
+# generated from the Master Plate sheet directly rather
+# than trying to deal with underscores in this script.
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the input file (ZScores_Interaction.csv, REMcReady.csv, other files with OrfRep col) for your genome wide YKO/YKD experiment
+input_file1 <- Args[1]
+
+#Arg 2 we need to supply the DAmPs list so we can remove these genes
+DAmPs_List <- Args[2]
+
+#Arg 3 is the output file
+output_file <- Args[3]
+
+X <- read.csv(file=input_file1,stringsAsFactors = FALSE)
+Damps <- read.delim(DAmPs_List,header=F)
+
+#create a column in X1 called ORF so we can remove OrfRep numbers and find all the DAmPs
+X$ORF <- X$OrfRep
+# Sean: remove _1-4 from newly created ORF column
+# Remy: following 4 lines are unnecessary
+# X$ORF <- gsub("_1","",x=X$ORF)
+# X$ORF <- gsub("_2","",x=X$ORF)
+# X$ORF <- gsub("_3","",x=X$ORF)
+# X$ORF <- gsub("_4","",x=X$ORF)
+
+X <- X[!(X$ORF %in% Damps$V1),]
+
+last_col <- dim(X)[2]
+
+X <- X[,1:(last_col-1)]
+
+write.csv(X,file = output_file,row.names = FALSE)
--- a/workflow/.old/apps/r/SSscripts/Compare_GTF_Averages_BetweenScreens_lm_Kvals_v2.R
+++ b/workflow/.old/apps/r/SSscripts/Compare_GTF_Averages_BetweenScreens_lm_Kvals_v2.R
@@ -0,0 +1,374 @@
+library(ggplot2)
+library(plotly)
+library(htmlwidgets)
+library(extrafont)
+library(grid)
+library(ggthemes)
+
+#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
+#Arg1 is Average_GOTerms_All_1.csv
+#Arg2 is the name to give GTF results 1
+#Arg3 is Average_GOTerms_All2.csv
+#Arg4 is the name to give GTF results 2
+#Arg5 is the directory to put the files into 
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of GTF results 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of GTF results 2 to print in the results
+Name2 <- Args[4]
+
+#arg 5 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[5]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[5]
+
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+    + theme(plot.title = element_text(face = "bold",
+                                      size = rel(1.2), hjust = 0.5),
+            text = element_text(),
+            panel.background = element_rect(colour = NA),
+            plot.background = element_rect(colour = NA),
+            panel.border = element_rect(colour = NA),
+            axis.title = element_text(face = "bold",size = rel(1)),
+            axis.title.y = element_text(angle=90,vjust =2),
+            axis.title.x = element_text(vjust = -0.2),
+            axis.text = element_text(), 
+            axis.line = element_line(colour="black"),
+            axis.ticks = element_line(),
+            panel.grid.major = element_line(colour="#f0f0f0"),
+            panel.grid.minor = element_blank(),
+            legend.key = element_rect(colour = NA),
+            legend.position = "bottom",
+            legend.direction = "horizontal",
+            legend.key.size= unit(0.2, "cm"),
+            legend.spacing = unit(0, "cm"),
+            legend.title = element_text(face="italic"),
+            plot.margin=unit(c(10,5,5,5),"mm"),
+            strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+            strip.text = element_text(face="bold")
+    ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
+
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
+
+
+X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
+
+gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
+X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2),]
+
+X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2),]
+
+X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2),]
+
+X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2),]
+
+Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2),]
+
+Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2),]
+
+X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2),]
+
+X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2),]
+
+X$Overlap_Avg <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
+
+
+
+gg <- ggplot(data = X,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
+
+gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
+
+
+X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
+
+gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+#only output GTF terms where average score is still above 2 after subtracting the SD
+#Z1 will ID aggravators, Z2 alleviators
+Z1 <- X
+Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
+Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
+
+Z2 <- X
+Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
+Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
+
+
+X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
+
+X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
+
+X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
+
+X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
+
+Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
+
+Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
+
+X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
+
+X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
+
+X$Overlap <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
+
+
+X_abovethreshold <- X[!(is.na(X$Overlap)),]
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+X_abovethreshold$X1_Rank <- NA
+X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1,ties.method = "random")
+X_abovethreshold$X2_Rank <- NA
+X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2,ties.method = "random")
+  
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_K_Avg_X1,y=Z_lm_K_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_K_SD_X1,SD_2=Z_lm_K_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
+write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
--- a/workflow/.old/apps/r/SSscripts/Compare_GTF_Averages_BetweenScreens_lm_Lvals_v2.R
+++ b/workflow/.old/apps/r/SSscripts/Compare_GTF_Averages_BetweenScreens_lm_Lvals_v2.R
@@ -0,0 +1,374 @@
+library(ggplot2)
+library(plotly)
+library(htmlwidgets)
+library(extrafont)
+library(grid)
+library(ggthemes)
+
+#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
+#Arg1 is Average_GOTerms_All_1.csv
+#Arg2 is the name to give GTF results 1
+#Arg3 is Average_GOTerms_All2.csv
+#Arg4 is the name to give GTF results 2
+#Arg5 is the directory to put the files into 
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of GTF results 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of GTF results 2 to print in the results
+Name2 <- Args[4]
+
+#arg 5 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[5]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[5]
+
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+    + theme(plot.title = element_text(face = "bold",
+                                      size = rel(1.2), hjust = 0.5),
+            text = element_text(),
+            panel.background = element_rect(colour = NA),
+            plot.background = element_rect(colour = NA),
+            panel.border = element_rect(colour = NA),
+            axis.title = element_text(face = "bold",size = rel(1)),
+            axis.title.y = element_text(angle=90,vjust =2),
+            axis.title.x = element_text(vjust = -0.2),
+            axis.text = element_text(), 
+            axis.line = element_line(colour="black"),
+            axis.ticks = element_line(),
+            panel.grid.major = element_line(colour="#f0f0f0"),
+            panel.grid.minor = element_blank(),
+            legend.key = element_rect(colour = NA),
+            legend.position = "bottom",
+            legend.direction = "horizontal",
+            legend.key.size= unit(0.2, "cm"),
+            legend.spacing = unit(0, "cm"),
+            legend.title = element_text(face="italic"),
+            plot.margin=unit(c(10,5,5,5),"mm"),
+            strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+            strip.text = element_text(face="bold")
+    ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
+
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
+
+
+X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
+
+gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
+X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
+
+X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
+
+X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
+
+X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
+
+Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
+
+Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
+
+X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
+
+X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
+
+X$Overlap_Avg <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
+
+
+
+gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
+
+gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
+
+
+X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
+
+gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+#only output GTF terms where average score is still above 2 after subtracting the SD
+#Z1 will ID aggravators, Z2 alleviators
+Z1 <- X
+Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
+Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
+
+Z2 <- X
+Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
+Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
+
+
+X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
+
+X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
+
+X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
+
+X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
+
+Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
+
+Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
+
+X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
+
+X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
+
+X$Overlap <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
+
+
+X_abovethreshold <- X[!(is.na(X$Overlap)),]
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+X_abovethreshold$X1_Rank <- NA
+X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
+X_abovethreshold$X2_Rank <- NA
+X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
+  
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
+write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
--- a/workflow/.old/apps/r/SSscripts/Compare_GTF_Averages_BetweenScreens_lm_v2.R
+++ b/workflow/.old/apps/r/SSscripts/Compare_GTF_Averages_BetweenScreens_lm_v2.R
@@ -0,0 +1,374 @@
+library(ggplot2)
+library(plotly)
+library(htmlwidgets)
+library(extrafont)
+library(grid)
+library(ggthemes)
+
+#use this Rscript to compare two results sheets from GTF analysis "Average_GOTerms_All.csv"
+#Arg1 is Average_GOTerms_All_1.csv
+#Arg2 is the name to give GTF results 1
+#Arg3 is Average_GOTerms_All2.csv
+#Arg4 is the name to give GTF results 2
+#Arg5 is the directory to put the files into 
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of GTF results 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of GTF results 2 to print in the results
+Name2 <- Args[4]
+
+#arg 5 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[5]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[5]
+
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+    + theme(plot.title = element_text(face = "bold",
+                                      size = rel(1.2), hjust = 0.5),
+            text = element_text(),
+            panel.background = element_rect(colour = NA),
+            plot.background = element_rect(colour = NA),
+            panel.border = element_rect(colour = NA),
+            axis.title = element_text(face = "bold",size = rel(1)),
+            axis.title.y = element_text(angle=90,vjust =2),
+            axis.title.x = element_text(vjust = -0.2),
+            axis.text = element_text(), 
+            axis.line = element_line(colour="black"),
+            axis.ticks = element_line(),
+            panel.grid.major = element_line(colour="#f0f0f0"),
+            panel.grid.minor = element_blank(),
+            legend.key = element_rect(colour = NA),
+            legend.position = "bottom",
+            legend.direction = "horizontal",
+            legend.key.size= unit(0.2, "cm"),
+            legend.spacing = unit(0, "cm"),
+            legend.title = element_text(face="italic"),
+            plot.margin=unit(c(10,5,5,5),"mm"),
+            strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+            strip.text = element_text(face="bold")
+    ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLEG_GenomeWide/ Average_GOTerms_All.csv",stringsAsFactors=FALSE,header=TRUE)
+
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.html",sep="")
+
+
+X <- merge(X1,X2,by ="Term_Avg",all=TRUE,suffixes = c("_X1","_X2"))
+
+gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Ontology_Avg_X1,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOntology.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+#ID aggravators and alleviators, regardless of whether they meet 2SD threshold
+X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2),]
+
+X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2),]
+
+X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2),]
+
+X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2),]
+
+Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2),]
+
+Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2),]
+
+X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2),]
+
+X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2),]
+
+X$Overlap_Avg <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg,]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg,]$Overlap_Avg <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap.html",sep="")
+
+
+
+gg <- ggplot(data = X,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2,]
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_byOverlap_above2genes.html",sep="")
+
+gg <- ggplot(data = x_rem2_gene,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_above2genes.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_byOverlap.html",sep="")
+
+
+X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)),]
+
+gg <- ggplot(data = X_overlap_nothresold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap_Avg,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep = "")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_Above2SD_ByOverlap.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+
+
+#only output GTF terms where average score is still above 2 after subtracting the SD
+#Z1 will ID aggravators, Z2 alleviators
+Z1 <- X
+Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
+Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
+
+Z2 <- X
+Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
+Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
+
+
+X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2),]
+
+X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2),]
+
+X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2),]
+
+X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2),]
+
+Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2),]
+
+Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2),]
+
+X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2),]
+
+X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2),]
+
+X$Overlap <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name1,"Specific_Deletion_Suppresors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Enhancers",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg,]$Overlap <- paste(Name2,"Specific_Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg,]$Overlap <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Enhancers",Name1,"Deletion_Suppressors",sep="_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg,]$Overlap <- paste(Name2,"Deletion_Suppressors",Name1,"Deletion_Enhancers",sep="_"))
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.html",sep="")
+
+
+X_abovethreshold <- X[!(is.na(X$Overlap)),]
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold.pdf",sep=""),width = 12,height = 8)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=Term_Avg),nudge_y = 0.25,size=2) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_names.pdf",sep=""),width = 20,height = 20)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+X_abovethreshold$X1_Rank <- NA
+X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1,ties.method = "random")
+X_abovethreshold$X2_Rank <- NA
+X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2,ties.method = "random")
+  
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=X1_Rank),nudge_y = 0.25,size=4) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX1.pdf",sep=""),width = 15,height = 15)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+
+plotly_path <- paste(getwd(),"/",outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.html",sep="")
+
+
+gg <- ggplot(data = X_abovethreshold,aes(x=Z_lm_L_Avg_X1,y=Z_lm_L_Avg_X2,color=Overlap,Term=Term_Avg,Genes=Genes_Avg_X1,NumGenes=NumGenes_Avg_X1,AllPossibleGenes=AllPossibleGenes_Avg_X1,SD_1=Z_lm_L_SD_X1,SD_2=Z_lm_L_SD_X2)) + 
+  xlab(paste("GO Term Avg lm Z for ",Name1,sep="")) +
+  geom_text(aes(label=X2_Rank),nudge_y = 0.25,size=4) + 
+  geom_rect(aes(xmin=-2,xmax=2,ymin=-2,ymax=2),color="grey20",size=0.25,alpha=0.1,inherit.aes = FALSE,fill=NA) + geom_point(shape=3,size=3) +
+  ylab(paste("GO Term Avg lm Z for ",Name2,sep="")) + ggtitle(paste("Comparing Average GO Term Z lm for ",Name1," vs. ",Name2,sep="")) +
+  theme_Publication_legend_right()
+pdf(paste(outputpath,"Scatter_lm_GTF_Averages_",Name1,"_vs_",Name2,"_All_ByOverlap_AboveThreshold_numberedX2.pdf",sep=""),width = 15,height = 15)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+saveWidget(pgg, plotly_path, 
+           selfcontained =TRUE)
+
+write.csv(x=X,file = paste(outputpath,"All_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
+write.csv(x=X_abovethreshold,file = paste(outputpath,"AboveThreshold_GTF_Avg_Scores_",Name1,"_vs_",Name2,".csv",sep=""),row.names = FALSE)
--- a/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_3terms_V2.R
+++ b/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_3terms_V2.R
@@ -0,0 +1,739 @@
+library("ontologyIndex")
+library("ggplot2")
+library("RColorBrewer")
+library("grid")
+library("ggthemes")
+#library("plotly")
+#library("htmlwidgets")
+library("extrafont")
+library("stringr")
+library("org.Sc.sgd.db")
+library("ggrepel")
+library("gplots")
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
+Name2 <- Args[4]
+
+#Arg 5 is the GTF results 3
+input_file3 <- Args[5]
+
+#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
+Name3 <- Args[6]
+
+
+#Arg 7 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
+ontology_obo_input <- Args[7]
+
+#Arg 8 is the go_terms.tab file
+GOtermstab_file <- Args[8]
+
+#Arg 9 is the GO:ID - example: chromatin organization is GO:0006325
+#for all processes use biological process GO:0008150
+#all functions use molecular function GO:0003674
+#all components use cellular component GO:0005575
+GO_ID_Arg <- Args[9]
+
+#arg 10 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[10]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[10]
+
+
+
+#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
+#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
+
+
+# if (file.exists(outputpath_X1_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X1_rank))
+# }
+# 
+# if (file.exists(outputpath_X2_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X2_rank))
+# }
+# 
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "bottom",
+           legend.direction = "horizontal",
+           legend.key.size= unit(0.2, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
+
+
+X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
+
+#X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
+
+# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
+
+
+
+X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
+
+X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
+X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
+X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+
+#www.geneontology.org/ontology/gene_ontology_edit.obo file
+Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
+
+#all ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+
+
+#Gene_Association is the gene association to GO term file
+#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
+#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
+
+
+#Terms is the GO term list
+Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+
+X1$ORF <- X1$OrfRep
+X1$ORF <- gsub("_1","",x=X1$ORF)
+X1$ORF <- gsub("_2","",x=X1$ORF)
+X1$ORF <- gsub("_3","",x=X1$ORF)
+X1$ORF <- gsub("_4","",x=X1$ORF)
+
+X1$Score_L <- "No Effect"
+X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X1$Score_K <- "No Effect"
+X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+
+
+X2$ORF <- X2$OrfRep
+X2$ORF <- gsub("_1","",x=X2$ORF)
+X2$ORF <- gsub("_2","",x=X2$ORF)
+X2$ORF <- gsub("_3","",x=X2$ORF)
+X2$ORF <- gsub("_4","",x=X2$ORF)
+
+X2$Score_L <- "No Effect"
+X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X2$Score_K <- "No Effect"
+X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+X4$ORF <- X4$OrfRep
+X4$ORF <- gsub("_1","",x=X4$ORF)
+X4$ORF <- gsub("_2","",x=X4$ORF)
+X4$ORF <- gsub("_3","",x=X4$ORF)
+X4$ORF <- gsub("_4","",x=X4$ORF)
+
+X4$Score_L <- "No Effect"
+X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
+X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X4$Score_K <- "No Effect"
+X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
+X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+# 
+# X5$ORF <- X5$OrfRep
+# X5$ORF <- gsub("_1","",x=X5$ORF)
+# X5$ORF <- gsub("_2","",x=X5$ORF)
+# X5$ORF <- gsub("_3","",x=X5$ORF)
+# X5$ORF <- gsub("_4","",x=X5$ORF)
+# 
+# X5$Score_L <- "No Effect"
+# X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
+# X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+# X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+# 
+# X5$Score_K <- "No Effect"
+# X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
+# X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+# X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+# X6$ORF <- X6$OrfRep
+# X6$ORF <- gsub("_1","",x=X6$ORF)
+# X6$ORF <- gsub("_2","",x=X6$ORF)
+# X6$ORF <- gsub("_3","",x=X6$ORF)
+# X6$ORF <- gsub("_4","",x=X6$ORF)
+# 
+# X6$Score_L <- "No Effect"
+# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
+# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+# X6$Score_K <- "No Effect"
+# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
+# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+#express the na data as 0.001 in X1 for K and L
+X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
+X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+#express the na data as 0.001 in X2
+X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
+X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+
+#express the na data as 0.001 in X4
+X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
+X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X5
+# X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
+# X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X6
+# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
+# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+
+X1$Rank_L <- rank(X1$Z_lm_L)
+X1$Rank_K <- rank(X1$Z_lm_K)
+
+X2$Rank_L <- rank(X2$Z_lm_L)
+X2$Rank_K <- rank(X2$Z_lm_K)
+
+X4$Rank_L <- rank(X4$Z_lm_L)
+X4$Rank_K <- rank(X4$Z_lm_K)
+
+# X5$Rank_L <- rank(X5$Z_lm_L)
+# X5$Rank_K <- rank(X5$Z_lm_K)
+
+
+# X6$Rank_L <- rank(X6$Z_lm_L)
+# X6$Rank_K <- rank(X6$Z_lm_K)
+
+X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
+X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
+X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
+#X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
+#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
+
+colnames(X1) <- paste(colnames(X1),"_X1",sep="")
+colnames(X2) <- paste(colnames(X2),"_X2",sep="")
+colnames(X4) <- paste(colnames(X4),"_X4",sep="")
+#colnames(X5) <- paste(colnames(X5),"_X5",sep="")
+#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
+# colnames(X1)[1] <- "OrfRep"
+# colnames(X2)[1] <- "OrfRep"
+# colnames(X4)[1] <- "OrfRep"
+# colnames(X5)[1] <- "OrfRep"
+# colnames(X6)[1] <- "OrfRep"
+
+X <- cbind(X1,X2,X4)
+#print(dim(X))
+#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
+#print(X[2700,])
+#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
+#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
+X$ORF <- X$OrfRep_X1
+
+#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
+X$ORF <- gsub("_1","",x=X$ORF)
+X$ORF <- gsub("_2","",x=X$ORF)
+X$ORF <- gsub("_3","",x=X$ORF)
+X$ORF <- gsub("_4","",x=X$ORF)
+
+try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
+try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
+try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
+#try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
+#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
+#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
+# 
+# #express the na data as 0.001
+# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
+# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
+# 
+# X$Overlap <- "No Effect"
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+# 
+# X$Overlap_K <- "No Effect"
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+
+# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
+# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
+# 
+# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
+# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
+
+X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+                 colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+                 colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | 
+                 colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+                 colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
+                 colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+                 colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+                 colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
+                 colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
+#print(colnames(X_heatmap))
+#break()
+X_heatmap <- X_heatmap[,c(14,1,4,5,8,9,12,13,2,3,6,7,10,11)]
+colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
+#colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
+#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
+
+colnames(X_heatmap)[2] <- "Gene"
+
+
+colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
+
+for(s in 1:dim(X3)[1]){
+  #Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
+  #Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
+  #GO_ID_Arg <- "GO:0006325"
+  GO_ID_Arg_loop <- as.character(X3[s,1])
+  GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
+  #GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
+  #only make plots if parent term has fewer than 500 children
+  if(length(GOTerm_parent) > 100){
+    #print(length(GOTerm_parent))
+    next()
+  }
+  
+  
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
+  if(Parent_Size < 2){
+    next()
+  }
+  if(Parent_Size > 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+  if(Parent_Size >= 1000 && Parent_Size <= 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 500 && Parent_Size <= 1000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 200 && Parent_Size <= 500){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 100 && Parent_Size <= 200){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 60 && Parent_Size <= 100){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 30 && Parent_Size <= 60){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 3 && Parent_Size <= 30){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+  if(Parent_Size == 2){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+  
+  
+}
--- a/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_4terms_aging.R
+++ b/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_4terms_aging.R
@@ -0,0 +1,729 @@
+library("ontologyIndex")
+library("ggplot2")
+library("RColorBrewer")
+library("grid")
+library("ggthemes")
+#library("plotly")
+#library("htmlwidgets")
+library("extrafont")
+library("stringr")
+library("org.Sc.sgd.db")
+library("ggrepel")
+library("gplots")
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
+Name2 <- Args[4]
+
+#Arg 5 is the GTF results 3
+input_file3 <- Args[5]
+
+#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
+Name3 <- Args[6]
+
+#Arg 7 is GTF results 4
+input_file4 <- Args[7]
+
+#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
+Name4 <- Args[8]
+
+
+#Arg 9 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
+ontology_obo_input <- Args[9]
+
+#Arg 10 is the go_terms.tab file
+GOtermstab_file <- Args[10]
+
+#Arg 11 is the GO:ID - example: chromatin organization is GO:0006325
+#for all processes use biological process GO:0008150
+#all functions use molecular function GO:0003674
+#all components use cellular component GO:0005575
+GO_ID_Arg <- Args[11]
+
+#arg 12 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[12]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[12]
+
+
+
+#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
+#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
+
+
+# if (file.exists(outputpath_X1_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X1_rank))
+# }
+# 
+# if (file.exists(outputpath_X2_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X2_rank))
+# }
+# 
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "bottom",
+           legend.direction = "horizontal",
+           legend.key.size= unit(0.2, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
+
+
+X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
+
+X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
+
+# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
+
+
+
+X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
+
+X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
+X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
+X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+
+#www.geneontology.org/ontology/gene_ontology_edit.obo file
+Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
+
+#all ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+
+
+#Gene_Association is the gene association to GO term file
+#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
+#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
+
+
+#Terms is the GO term list
+Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+
+X1$ORF <- X1$OrfRep
+X1$ORF <- gsub("_1","",x=X1$ORF)
+X1$ORF <- gsub("_2","",x=X1$ORF)
+X1$ORF <- gsub("_3","",x=X1$ORF)
+X1$ORF <- gsub("_4","",x=X1$ORF)
+
+X1$Score_L <- "No Effect"
+X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X1$Score_K <- "No Effect"
+X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+
+
+X2$ORF <- X2$OrfRep
+X2$ORF <- gsub("_1","",x=X2$ORF)
+X2$ORF <- gsub("_2","",x=X2$ORF)
+X2$ORF <- gsub("_3","",x=X2$ORF)
+X2$ORF <- gsub("_4","",x=X2$ORF)
+
+X2$Score_L <- "No Effect"
+X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X2$Score_K <- "No Effect"
+X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+X4$ORF <- X4$OrfRep
+X4$ORF <- gsub("_1","",x=X4$ORF)
+X4$ORF <- gsub("_2","",x=X4$ORF)
+X4$ORF <- gsub("_3","",x=X4$ORF)
+X4$ORF <- gsub("_4","",x=X4$ORF)
+
+X4$Score_L <- "No Effect"
+X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
+X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X4$Score_K <- "No Effect"
+X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
+X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+X5$ORF <- X5$OrfRep
+X5$ORF <- gsub("_1","",x=X5$ORF)
+X5$ORF <- gsub("_2","",x=X5$ORF)
+X5$ORF <- gsub("_3","",x=X5$ORF)
+X5$ORF <- gsub("_4","",x=X5$ORF)
+
+X5$Score_L <- "No Effect"
+X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
+X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X5$Score_K <- "No Effect"
+X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
+X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+# X6$ORF <- X6$OrfRep
+# X6$ORF <- gsub("_1","",x=X6$ORF)
+# X6$ORF <- gsub("_2","",x=X6$ORF)
+# X6$ORF <- gsub("_3","",x=X6$ORF)
+# X6$ORF <- gsub("_4","",x=X6$ORF)
+# 
+# X6$Score_L <- "No Effect"
+# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
+# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+# X6$Score_K <- "No Effect"
+# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
+# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+#express the na data as 0.001 in X1 for K and L
+X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
+X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+#express the na data as 0.001 in X2
+X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
+X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+
+#express the na data as 0.001 in X4
+X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
+X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X5
+X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
+X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X6
+# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
+# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+
+#X1$Rank_L <- rank(X1$Z_lm_L)
+#X1$Rank_K <- rank(X1$Z_lm_K)
+
+#X2$Rank_L <- rank(X2$Z_lm_L)
+#X2$Rank_K <- rank(X2$Z_lm_K)
+
+#X4$Rank_L <- rank(X4$Z_lm_L)
+#X4$Rank_K <- rank(X4$Z_lm_K)
+
+#X5$Rank_L <- rank(X5$Z_lm_L)
+#X5$Rank_K <- rank(X5$Z_lm_K)
+
+
+# X6$Rank_L <- rank(X6$Z_lm_L)
+# X6$Rank_K <- rank(X6$Z_lm_K)
+
+X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
+X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
+X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
+X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
+#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
+
+colnames(X1) <- paste(colnames(X1),"_X1",sep="")
+colnames(X2) <- paste(colnames(X2),"_X2",sep="")
+colnames(X4) <- paste(colnames(X4),"_X4",sep="")
+colnames(X5) <- paste(colnames(X5),"_X5",sep="")
+#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
+ colnames(X1)[1] <- "OrfRep"
+ colnames(X2)[1] <- "OrfRep"
+ colnames(X4)[1] <- "OrfRep"
+ colnames(X5)[1] <- "OrfRep"
+# colnames(X6)[1] <- "OrfRep"
+
+#X <- cbind(X1,X2,X4,X5)
+#print(dim(X))
+#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
+#print(X[2700,])
+#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
+#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
+X <- merge(X1,X2,by ="OrfRep",all=TRUE)
+X <- merge(X,X4,by ="OrfRep",all=TRUE)
+X <- merge(X,X5,by ="OrfRep",all=TRUE)
+X$ORF <- X$OrfRep
+
+print(dim(X))
+print(colnames(X))
+
+#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
+X$ORF <- gsub("_1","",x=X$ORF)
+X$ORF <- gsub("_2","",x=X$ORF)
+X$ORF <- gsub("_3","",x=X$ORF)
+X$ORF <- gsub("_4","",x=X$ORF)
+
+#remove new NAs
+X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
+X[is.na(X$Z_lm_K_X1),]$Z_lm_K_X1 <- 0.001
+
+#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+#express the na data as 0.001 in X2
+X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
+X[is.na(X$Z_lm_K_X2),]$Z_lm_K_X2 <- 0.001
+
+#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+
+#express the na data as 0.001 in X4
+X[is.na(X$Z_lm_L_X4),]$Z_lm_L_X4 <- 0.001
+X[is.na(X$Z_lm_K_X4),]$Z_lm_K_X4 <- 0.001
+
+
+#express the na data as 0.001 in X5
+X[is.na(X$Z_lm_L_X5),]$Z_lm_L_X5 <- 0.001
+X[is.na(X$Z_lm_K_X5),]$Z_lm_K_X5 <- 0.001
+
+try(X[is.na(X$Gene_X1),]$Gene_X1 <- "")
+try(X[is.na(X$Gene_X2),]$Gene_X2 <- "")
+try(X[is.na(X$Gene_X4),]$Gene_X4 <- "")
+try(X[is.na(X$Gene_X5),]$Gene_X5 <- "")
+
+try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep)
+try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep)
+try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep)
+try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep)
+#write.csv(X,paste(outputpath,"18_0614_all_3.csv",sep=""))
+#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
+#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
+# 
+# #express the na data as 0.001
+# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
+# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
+# 
+# X$Overlap <- "No Effect"
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+# 
+# X$Overlap_K <- "No Effect"
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+
+# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
+# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
+# 
+# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
+# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
+
+print(head(X))
+
+write.csv(X,paste(outputpath,"18_0614_all_2.csv",sep=""))
+write.csv(X1,paste(outputpath,"18_0614_all_X1.csv",sep=""))
+write.csv(X2,paste(outputpath,"18_0614_all_X2.csv",sep=""))
+write.csv(X4,paste(outputpath,"18_0614_all_X4.csv",sep=""))
+write.csv(X5,paste(outputpath,"18_0614_all_X5.csv",sep=""))
+
+X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+                 colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+                 colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | 
+                 colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+                 colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
+                 colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+                 colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+                 colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
+                 colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
+#print(colnames(X_heatmap))
+#break()
+
+print(colnames(X_heatmap))
+X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)]
+colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
+#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
+
+colnames(X_heatmap)[2] <- "Gene"
+
+
+colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
+write.csv(X_heatmap,paste(outputpath,"18_0614_all.csv",sep=""))
+#break()
+
+for(s in 1:dim(X3)[1]){
+  #Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
+  #Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
+  #GO_ID_Arg <- "GO:0006325"
+  GO_ID_Arg_loop <- as.character(X3[s,1])
+  GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
+  #GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
+  #only make plots if parent term has fewer than 500 children
+  if(length(GOTerm_parent) > 100){
+    #print(length(GOTerm_parent))
+    next()
+  }
+  
+  
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
+  if(Parent_Size < 2){
+    next()
+  }
+  if(Parent_Size > 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+  if(Parent_Size >= 1000 && Parent_Size <= 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 500 && Parent_Size <= 1000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 200 && Parent_Size <= 500){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 100 && Parent_Size <= 200){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 60 && Parent_Size <= 100){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 30 && Parent_Size <= 60){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 2 && Parent_Size <= 30){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+
+  
+  
+  
+}
--- a/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_4terms_v2.R
+++ b/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_4terms_v2.R
@@ -0,0 +1,720 @@
+library("ontologyIndex")
+library("ggplot2")
+library("RColorBrewer")
+library("grid")
+library("ggthemes")
+#library("plotly")
+#library("htmlwidgets")
+library("extrafont")
+library("stringr")
+library("org.Sc.sgd.db")
+library("ggrepel")
+library("gplots")
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
+Name2 <- Args[4]
+
+#Arg 5 is the GTF results 3
+input_file3 <- Args[5]
+
+#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
+Name3 <- Args[6]
+
+#Arg 7 is GTF results 4
+input_file4 <- Args[7]
+
+#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
+Name4 <- Args[8]
+
+
+#Arg 9 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
+ontology_obo_input <- Args[9]
+
+#Arg 10 is the go_terms.tab file
+GOtermstab_file <- Args[10]
+
+#Arg 11 is the GO:ID - example: chromatin organization is GO:0006325
+#for all processes use biological process GO:0008150
+#all functions use molecular function GO:0003674
+#all components use cellular component GO:0005575
+GO_ID_Arg <- Args[11]
+
+#arg 12 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[12]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[12]
+
+
+
+#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
+#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
+
+
+# if (file.exists(outputpath_X1_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X1_rank))
+# }
+# 
+# if (file.exists(outputpath_X2_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X2_rank))
+# }
+# 
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "bottom",
+           legend.direction = "horizontal",
+           legend.key.size= unit(0.2, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
+
+
+X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
+
+X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
+
+# X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
+
+
+
+X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
+
+X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
+X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
+X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+
+#www.geneontology.org/ontology/gene_ontology_edit.obo file
+Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
+
+#all ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+
+
+#Gene_Association is the gene association to GO term file
+#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
+#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
+
+
+#Terms is the GO term list
+Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+
+X1$ORF <- X1$OrfRep
+X1$ORF <- gsub("_1","",x=X1$ORF)
+X1$ORF <- gsub("_2","",x=X1$ORF)
+X1$ORF <- gsub("_3","",x=X1$ORF)
+X1$ORF <- gsub("_4","",x=X1$ORF)
+
+X1$Score_L <- "No Effect"
+X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X1$Score_K <- "No Effect"
+X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+
+
+X2$ORF <- X2$OrfRep
+X2$ORF <- gsub("_1","",x=X2$ORF)
+X2$ORF <- gsub("_2","",x=X2$ORF)
+X2$ORF <- gsub("_3","",x=X2$ORF)
+X2$ORF <- gsub("_4","",x=X2$ORF)
+
+X2$Score_L <- "No Effect"
+X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X2$Score_K <- "No Effect"
+X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+X4$ORF <- X4$OrfRep
+X4$ORF <- gsub("_1","",x=X4$ORF)
+X4$ORF <- gsub("_2","",x=X4$ORF)
+X4$ORF <- gsub("_3","",x=X4$ORF)
+X4$ORF <- gsub("_4","",x=X4$ORF)
+
+X4$Score_L <- "No Effect"
+X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth"
+X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X4$Score_K <- "No Effect"
+X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth"
+X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+X5$ORF <- X5$OrfRep
+X5$ORF <- gsub("_1","",x=X5$ORF)
+X5$ORF <- gsub("_2","",x=X5$ORF)
+X5$ORF <- gsub("_3","",x=X5$ORF)
+X5$ORF <- gsub("_4","",x=X5$ORF)
+
+X5$Score_L <- "No Effect"
+X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth"
+X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X5$Score_K <- "No Effect"
+X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth"
+X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+# X6$ORF <- X6$OrfRep
+# X6$ORF <- gsub("_1","",x=X6$ORF)
+# X6$ORF <- gsub("_2","",x=X6$ORF)
+# X6$ORF <- gsub("_3","",x=X6$ORF)
+# X6$ORF <- gsub("_4","",x=X6$ORF)
+# 
+# X6$Score_L <- "No Effect"
+# X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth"
+# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+# X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+# X6$Score_K <- "No Effect"
+# X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth"
+# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+# X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+#express the na data as 0.001 in X1 for K and L
+X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
+X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+#express the na data as 0.001 in X2
+X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
+X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+
+#express the na data as 0.001 in X4
+X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
+X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X5
+X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
+X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X6
+# X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
+# X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+
+X1$Rank_L <- rank(X1$Z_lm_L)
+X1$Rank_K <- rank(X1$Z_lm_K)
+
+X2$Rank_L <- rank(X2$Z_lm_L)
+X2$Rank_K <- rank(X2$Z_lm_K)
+
+X4$Rank_L <- rank(X4$Z_lm_L)
+X4$Rank_K <- rank(X4$Z_lm_K)
+
+X5$Rank_L <- rank(X5$Z_lm_L)
+X5$Rank_K <- rank(X5$Z_lm_K)
+
+
+# X6$Rank_L <- rank(X6$Z_lm_L)
+# X6$Rank_K <- rank(X6$Z_lm_K)
+
+X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
+X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
+X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
+X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
+#X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
+
+colnames(X1) <- paste(colnames(X1),"_X1",sep="")
+colnames(X2) <- paste(colnames(X2),"_X2",sep="")
+colnames(X4) <- paste(colnames(X4),"_X4",sep="")
+colnames(X5) <- paste(colnames(X5),"_X5",sep="")
+#colnames(X6) <- paste(colnames(X6),"_X6",sep="")
+# colnames(X1)[1] <- "OrfRep"
+# colnames(X2)[1] <- "OrfRep"
+# colnames(X4)[1] <- "OrfRep"
+# colnames(X5)[1] <- "OrfRep"
+# colnames(X6)[1] <- "OrfRep"
+
+X <- cbind(X1,X2,X4,X5)
+#print(dim(X))
+#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
+#print(X[2700,])
+#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
+#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
+X$ORF <- X$OrfRep_X1
+
+#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
+X$ORF <- gsub("_1","",x=X$ORF)
+X$ORF <- gsub("_2","",x=X$ORF)
+X$ORF <- gsub("_3","",x=X$ORF)
+X$ORF <- gsub("_4","",x=X$ORF)
+
+try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
+try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
+try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
+try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
+#try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
+#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
+# 
+# #express the na data as 0.001
+# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
+# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
+# 
+# X$Overlap <- "No Effect"
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+# 
+# X$Overlap_K <- "No Effect"
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+
+# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
+# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
+# 
+# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
+# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
+
+X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+                 colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+                 colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | 
+                 colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+                 colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
+                 colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+                 colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+                 colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
+                 colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" ]
+#print(colnames(X_heatmap))
+#break()
+X_heatmap <- X_heatmap[,c(18,1,4,5,8,9,12,13,16,17,2,3,6,7,10,11,14,15)]
+colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
+#colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
+
+colnames(X_heatmap)[2] <- "Gene"
+
+
+colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
+
+for(s in 1:dim(X3)[1]){
+  #Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
+  #Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
+  #GO_ID_Arg <- "GO:0006325"
+  GO_ID_Arg_loop <- as.character(X3[s,1])
+  GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
+  #GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
+  #only make plots if parent term has fewer than 500 children
+  if(length(GOTerm_parent) > 100){
+    #print(length(GOTerm_parent))
+    next()
+  }
+  
+  
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
+  if(Parent_Size < 2){
+    next()
+  }
+  if(Parent_Size > 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+  if(Parent_Size >= 1000 && Parent_Size <= 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 500 && Parent_Size <= 1000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 200 && Parent_Size <= 500){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 100 && Parent_Size <= 200){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 60 && Parent_Size <= 100){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 30 && Parent_Size <= 60){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 3 && Parent_Size <= 30){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size == 2){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      #print(X0)
+      if(dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+
+  
+  
+  
+}
--- a/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_5terms_V2.R
+++ b/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_5terms_V2.R
@@ -0,0 +1,754 @@
+library("ontologyIndex")
+library("ggplot2")
+library("RColorBrewer")
+library("grid")
+library("ggthemes")
+#library("plotly")
+#library("htmlwidgets")
+library("extrafont")
+library("stringr")
+library("org.Sc.sgd.db")
+library("ggrepel")
+library(gplots)
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
+Name2 <- Args[4]
+
+#Arg 5 is the GTF results 3
+input_file3 <- Args[5]
+
+#Arg 6 is the name of Interaction score file (Zscores_Interaction.csv) 3 to print in the results
+Name3 <- Args[6]
+
+#Arg 7 is GTF results 4
+input_file4 <- Args[7]
+
+#Arg 8 is the name of Interaction score file (Zscores_Interaction.csv) 4 to print in the results
+Name4 <- Args[8]
+
+#Arg 9 is GTF results 5
+input_file5 <- Args[9]
+
+#Arg 10 is the name of Interaction score file (Zscores_Interaction.csv) 5 to print in the results
+Name5 <- Args[10]
+
+#Arg 11 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
+ontology_obo_input <- Args[11]
+
+#Arg 12 is the go_terms.tab file
+GOtermstab_file <- Args[12]
+
+#Arg 13 is the GO:ID - example: chromatin organization is GO:0006325
+#for all processes use biological process GO:0008150
+#all functions use molecular function GO:0003674
+#all components use cellular component GO:0005575
+GO_ID_Arg <- Args[13]
+
+#arg 14 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[14]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[14]
+
+
+
+#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
+#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
+
+
+# if (file.exists(outputpath_X1_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X1_rank))
+# }
+# 
+# if (file.exists(outputpath_X2_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X2_rank))
+# }
+# 
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "bottom",
+           legend.direction = "horizontal",
+           legend.key.size= unit(0.2, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
+
+
+X4 <- read.csv(file = input_file3,stringsAsFactors=FALSE,header = TRUE)
+
+X5 <- read.csv(file = input_file4,stringsAsFactors=FALSE,header = TRUE)
+
+X6 <- read.csv(file = input_file5,stringsAsFactors=FALSE,header = TRUE)
+
+
+
+X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
+
+X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
+X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
+X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+
+#www.geneontology.org/ontology/gene_ontology_edit.obo file
+Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
+
+#all ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+
+
+#Gene_Association is the gene association to GO term file
+#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
+#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
+
+
+#Terms is the GO term list
+Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+
+X1$ORF <- X1$OrfRep
+X1$ORF <- gsub("_1","",x=X1$ORF)
+X1$ORF <- gsub("_2","",x=X1$ORF)
+X1$ORF <- gsub("_3","",x=X1$ORF)
+X1$ORF <- gsub("_4","",x=X1$ORF)
+
+X1$Score_L <- "No Effect"
+try(X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth")
+try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
+try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
+
+X1$Score_K <- "No Effect"
+try(X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth")
+try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
+try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
+
+
+
+
+X2$ORF <- X2$OrfRep
+X2$ORF <- gsub("_1","",x=X2$ORF)
+X2$ORF <- gsub("_2","",x=X2$ORF)
+X2$ORF <- gsub("_3","",x=X2$ORF)
+X2$ORF <- gsub("_4","",x=X2$ORF)
+
+X2$Score_L <- "No Effect"
+try(X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth")
+try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
+try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
+
+X2$Score_K <- "No Effect"
+try(X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth")
+try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
+try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
+
+
+X4$ORF <- X4$OrfRep
+X4$ORF <- gsub("_1","",x=X4$ORF)
+X4$ORF <- gsub("_2","",x=X4$ORF)
+X4$ORF <- gsub("_3","",x=X4$ORF)
+X4$ORF <- gsub("_4","",x=X4$ORF)
+
+X4$Score_L <- "No Effect"
+try(X4[is.na(X4$Z_lm_L),]$Score_L <- "No Growth")
+try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
+try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
+
+X4$Score_K <- "No Effect"
+try(X4[is.na(X4$Z_lm_K),]$Score_K <- "No Growth")
+try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
+try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
+
+X5$ORF <- X5$OrfRep
+X5$ORF <- gsub("_1","",x=X5$ORF)
+X5$ORF <- gsub("_2","",x=X5$ORF)
+X5$ORF <- gsub("_3","",x=X5$ORF)
+X5$ORF <- gsub("_4","",x=X5$ORF)
+
+X5$Score_L <- "No Effect"
+try(X5[is.na(X5$Z_lm_L),]$Score_L <- "No Growth")
+try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
+try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
+
+X5$Score_K <- "No Effect"
+try(X5[is.na(X5$Z_lm_K),]$Score_K <- "No Growth")
+try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
+try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
+
+
+X6$ORF <- X6$OrfRep
+X6$ORF <- gsub("_1","",x=X6$ORF)
+X6$ORF <- gsub("_2","",x=X6$ORF)
+X6$ORF <- gsub("_3","",x=X6$ORF)
+X6$ORF <- gsub("_4","",x=X6$ORF)
+
+X6$Score_L <- "No Effect"
+try(X6[is.na(X6$Z_lm_L),]$Score_L <- "No Growth")
+try(X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer")
+try(X6[!is.na(X6$Z_lm_L) & X6$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor")
+
+X6$Score_K <- "No Effect"
+try(X6[is.na(X6$Z_lm_K),]$Score_K <- "No Growth")
+try(X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor")
+try(X6[!is.na(X6$Z_lm_K) & X6$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer")
+
+#express the na data as 0.001 in X1 for K and L
+X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
+X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+#express the na data as 0.001 in X2
+X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
+X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+
+#express the na data as 0.001 in X4
+X4[is.na(X4$Z_lm_L),]$Z_lm_L <- 0.001
+X4[is.na(X4$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X5
+X5[is.na(X5$Z_lm_L),]$Z_lm_L <- 0.001
+X5[is.na(X5$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+#express the na data as 0.001 in X6
+X6[is.na(X6$Z_lm_L),]$Z_lm_L <- 0.001
+X6[is.na(X6$Z_lm_K),]$Z_lm_K <- 0.001
+
+
+
+X1$Rank_L <- rank(X1$Z_lm_L)
+X1$Rank_K <- rank(X1$Z_lm_K)
+
+X2$Rank_L <- rank(X2$Z_lm_L)
+X2$Rank_K <- rank(X2$Z_lm_K)
+
+X4$Rank_L <- rank(X4$Z_lm_L)
+X4$Rank_K <- rank(X4$Z_lm_K)
+
+X5$Rank_L <- rank(X5$Z_lm_L)
+X5$Rank_K <- rank(X5$Z_lm_K)
+
+
+X6$Rank_L <- rank(X6$Z_lm_L)
+X6$Rank_K <- rank(X6$Z_lm_K)
+
+X1 <- X1[order(X1$OrfRep,decreasing = FALSE),]
+X2 <- X2[order(X2$OrfRep,decreasing = FALSE),]
+X4 <- X4[order(X4$OrfRep,decreasing = FALSE),]
+X5 <- X5[order(X5$OrfRep,decreasing = FALSE),]
+X6 <- X6[order(X6$OrfRep,decreasing = FALSE),]
+
+colnames(X1) <- paste(colnames(X1),"_X1",sep="")
+colnames(X2) <- paste(colnames(X2),"_X2",sep="")
+colnames(X4) <- paste(colnames(X4),"_X4",sep="")
+colnames(X5) <- paste(colnames(X5),"_X5",sep="")
+colnames(X6) <- paste(colnames(X6),"_X6",sep="")
+# colnames(X1)[1] <- "OrfRep"
+# colnames(X2)[1] <- "OrfRep"
+# colnames(X4)[1] <- "OrfRep"
+# colnames(X5)[1] <- "OrfRep"
+# colnames(X6)[1] <- "OrfRep"
+
+X <- cbind(X1,X2,X4,X5,X6)
+#print(dim(X))
+#print(paste(X$Gene_X1[2700],X$Gene_X2[2700],X$Gene_X4[2700],X$Gene_X5[2700],X$Gene_X6[2700]))
+#print(X[2700,])
+#X <- Reduce(function(x, y) merge(x, y,by ="OrfRep", all=TRUE), list(X1,X2,X4,X5,X6))
+#X <- merge(X1,X2,X4,X5,X6,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2","_X4","_X5","_X6"))
+X$ORF <- X$OrfRep_X1
+
+#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
+X$ORF <- gsub("_1","",x=X$ORF)
+X$ORF <- gsub("_2","",x=X$ORF)
+X$ORF <- gsub("_3","",x=X$ORF)
+X$ORF <- gsub("_4","",x=X$ORF)
+
+try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep_X1)
+try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep_X2)
+try(X[X$Gene_X4 == "",]$Gene_X4 <- X[X$Gene_X4 == "",]$OrfRep_X4)
+try(X[X$Gene_X5 == "",]$Gene_X5 <- X[X$Gene_X5 == "",]$OrfRep_X5)
+try(X[X$Gene_X6 == "",]$Gene_X6 <- X[X$Gene_X6 == "",]$OrfRep_X6)
+#write.csv(file = paste(outputpath,"TEST.csv",sep=""),x = X)
+# 
+# #express the na data as 0.001
+# #X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
+# #X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
+# 
+# X$Overlap <- "No Effect"
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+# 
+# X$Overlap_K <- "No Effect"
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+# try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+# try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+# try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+
+# X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
+# X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
+# 
+# X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
+# X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
+
+X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+                 colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+                 colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | 
+                 colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+                 colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
+                 colnames(X) == "Z_Shift_K_X6" | colnames(X) == "Z_lm_K_X6" |
+                 colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+                 colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+                 colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
+                 colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5" |
+                 colnames(X) == "Z_Shift_L_X6" | colnames(X) == "Z_lm_L_X6"]
+#print(colnames(X_heatmap))
+#break()
+X_heatmap <- X_heatmap[,c(22,1,4,5,8,9,12,13,16,17,20,21,2,3,6,7,10,11,14,15,18,19)]
+colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X4",replacement = Name3,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X5",replacement = Name4,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X6",replacement = Name5,colnames(X_heatmap))
+
+colnames(X_heatmap)[2] <- "Gene"
+
+
+colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
+
+for(s in 1:dim(X3)[1]){
+  #Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
+  #Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
+  #GO_ID_Arg <- "GO:0006325"
+  GO_ID_Arg_loop <- as.character(X3[s,1])
+  GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
+  #GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
+  #only make plots if parent term has fewer than 500 children
+  if(length(GOTerm_parent) > 100){
+    #print(length(GOTerm_parent))
+    next()
+  }
+  
+  
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
+  if(Parent_Size < 2){
+    next()
+  }
+  if(Parent_Size > 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+  if(Parent_Size >= 1000 && Parent_Size <= 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 500 && Parent_Size <= 1000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 200 && Parent_Size <= 500){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 100 && Parent_Size <= 200){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 60 && Parent_Size <= 100){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 30 && Parent_Size <= 60){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+
+    dev.off()
+  }
+  if(Parent_Size >= 3 && Parent_Size <= 30){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size == 2){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+
+  
+  
+  
+}
--- a/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_V2.R
+++ b/workflow/.old/apps/r/SSscripts/GO_list_All_ChildTerms_lmZscore_max100child_Heatmaps_V2.R
@@ -0,0 +1,622 @@
+library("ontologyIndex")
+library("ggplot2")
+library("RColorBrewer")
+library("grid")
+library("ggthemes")
+#library("plotly")
+#library("htmlwidgets")
+library("extrafont")
+library("stringr")
+library("org.Sc.sgd.db")
+library("ggrepel")
+library(gplots)
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the GTF results 1
+input_file1 <- Args[1]
+
+#Arg 2 is the name of Interaction score file (Zscores_Interaction.csv) 1 to print in the results
+Name1 <- Args[2]
+
+#Arg 3 is GTF results 3
+input_file2 <- Args[3]
+
+#Arg 4 is the name of Interaction score file (Zscores_Interaction.csv) 2 to print in the results
+Name2 <- Args[4]
+
+#Arg 5 is the www.geneontology.org/ontology/gene_ontology_edit.obo file - can download from this link
+ontology_obo_input <- Args[5]
+
+#Arg 6 is the go_terms.tab file
+GOtermstab_file <- Args[6]
+
+#Arg 7 is the GO:ID - example: chromatin organization is GO:0006325
+#for all processes use biological process GO:0008150
+#all functions use molecular function GO:0003674
+#all components use cellular component GO:0005575
+GO_ID_Arg <- Args[7]
+
+#arg 8 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[8]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[8]
+
+
+
+#outputpath_X1_rank <- paste(outputpath,Name1,"_Rank/",sep="")
+#outputpath_X2_rank <- paste(outputpath,Name2,"_Rank/",sep="")
+
+
+# if (file.exists(outputpath_X1_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X1_rank))
+# }
+# 
+# if (file.exists(outputpath_X2_rank)){
+#   #outputpath <- subDir
+# } else {
+#   dir.create(file.path(outputpath_X2_rank))
+# }
+# 
+
+
+
+#theme elements for plots
+theme_Publication <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "bottom",
+           legend.direction = "horizontal",
+           legend.key.size= unit(0.2, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  library(scales)
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+
+theme_Publication_legend_right <- function(base_size=14, base_family="sans") {
+  (theme_foundation(base_size=base_size, base_family=base_family)
+   + theme(plot.title = element_text(face = "bold",
+                                     size = rel(1.2), hjust = 0.5),
+           text = element_text(),
+           panel.background = element_rect(colour = NA),
+           plot.background = element_rect(colour = NA),
+           panel.border = element_rect(colour = NA),
+           axis.title = element_text(face = "bold",size = rel(1)),
+           axis.title.y = element_text(angle=90,vjust =2),
+           axis.title.x = element_text(vjust = -0.2),
+           axis.text = element_text(), 
+           axis.line = element_line(colour="black"),
+           axis.ticks = element_line(),
+           panel.grid.major = element_line(colour="#f0f0f0"),
+           panel.grid.minor = element_blank(),
+           legend.key = element_rect(colour = NA),
+           legend.position = "right",
+           legend.direction = "vertical",
+           legend.key.size= unit(0.5, "cm"),
+           legend.spacing = unit(0, "cm"),
+           legend.title = element_text(face="italic"),
+           plot.margin=unit(c(10,5,5,5),"mm"),
+           strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
+           strip.text = element_text(face="bold")
+   ))
+  
+}
+
+scale_fill_Publication <- function(...){
+  discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+scale_colour_Publication <- function(...){
+  discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)
+  
+}
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/Compare_GTF_DOXO/"
+
+
+
+X1 <- read.csv(file = input_file1,stringsAsFactors=FALSE,header = TRUE)
+
+#X1 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X2 <- read.csv(file = input_file2,stringsAsFactors=FALSE,header = TRUE)
+
+#X2 <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLEG.csv",stringsAsFactors=FALSE,header=TRUE)
+
+X3 <- read.csv(file = GO_ID_Arg,stringsAsFactors=FALSE,header = TRUE)
+
+X3[,1] <- paste("GO:",formatC(X3[,1],width=7,flag="0"),sep="")
+X3[,2] <- gsub(pattern = " ",replacement = "_",x = X3[,2])
+X3[,2] <- gsub(pattern = "/",replacement = "_",x = X3[,2])
+
+#Name1 <- "DOXO_HLD"
+#Name2 <- "DOXO_HLEG"
+
+
+#www.geneontology.org/ontology/gene_ontology_edit.obo file
+Ontology <- get_ontology(file=ontology_obo_input,propagate_relationships = "is_a",extract_tags = "minimal")
+
+#all ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+
+
+#Gene_Association is the gene association to GO term file
+#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+#Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
+#Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
+
+
+#Terms is the GO term list
+Terms <- read.delim(file=GOtermstab_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+
+X1$ORF <- X1$OrfRep
+X1$ORF <- gsub("_1","",x=X1$ORF)
+X1$ORF <- gsub("_2","",x=X1$ORF)
+X1$ORF <- gsub("_3","",x=X1$ORF)
+X1$ORF <- gsub("_4","",x=X1$ORF)
+
+X1$Score_L <- "No Effect"
+X1[is.na(X1$Z_lm_L),]$Score_L <- "No Growth"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X1$Score_K <- "No Effect"
+X1[is.na(X1$Z_lm_K),]$Score_K <- "No Growth"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+
+
+
+X2$ORF <- X2$OrfRep
+X2$ORF <- gsub("_1","",x=X2$ORF)
+X2$ORF <- gsub("_2","",x=X2$ORF)
+X2$ORF <- gsub("_3","",x=X2$ORF)
+X2$ORF <- gsub("_4","",x=X2$ORF)
+
+X2$Score_L <- "No Effect"
+X2[is.na(X2$Z_lm_L),]$Score_L <- "No Growth"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2,]$Score_L <- "Deletion Enhancer"
+X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2,]$Score_L <- "Deletion Suppressor"
+
+X2$Score_K <- "No Effect"
+X2[is.na(X2$Z_lm_K),]$Score_K <- "No Growth"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2,]$Score_K <- "Deletion Suppressor"
+X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2,]$Score_K <- "Deletion Enhancer"
+
+#express the na data as 0.001 in X1 for K and L
+X1[is.na(X1$Z_lm_L),]$Z_lm_L <- 0.001
+X1[is.na(X1$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X1[is.na(X1$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X1[is.na(X1$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+#express the na data as 0.001 in X2
+X2[is.na(X2$Z_lm_L),]$Z_lm_L <- 0.001
+X2[is.na(X2$Z_lm_K),]$Z_lm_K <- 0.001
+
+#X2[is.na(X2$Z_Shift_L),]$Z_Shift_L <- 0.0001
+#X2[is.na(X2$Z_Shift_K),]$Z_Shift_K <- 0.0001
+
+
+X1$Rank_L <- rank(X1$Z_lm_L)
+X1$Rank_K <- rank(X1$Z_lm_K)
+
+X2$Rank_L <- rank(X2$Z_lm_L)
+X2$Rank_K <- rank(X2$Z_lm_K)
+
+X <- merge(X1,X2,by ="OrfRep",all=TRUE,suffixes = c("_X1","_X2"))
+X$ORF <- X$OrfRep
+
+#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/17_0516_RF_Interaction_Score_against_self/17_0706_Doxo_RFZscore_AllData.csv",header=TRUE)
+X$ORF <- gsub("_1","",x=X$ORF)
+X$ORF <- gsub("_2","",x=X$ORF)
+X$ORF <- gsub("_3","",x=X$ORF)
+X$ORF <- gsub("_4","",x=X$ORF)
+
+try(X[X$Gene_X1 == "",]$Gene_X1 <- X[X$Gene_X1 == "",]$OrfRep)
+try(X[X$Gene_X2 == "",]$Gene_X2 <- X[X$Gene_X2 == "",]$OrfRep)
+
+
+#express the na data as 0.001
+#X[is.na(X$Z_lm_L_X1),]$Z_lm_L_X1 <- 0.001
+#X[is.na(X$Z_lm_L_X2),]$Z_lm_L_X2 <- 0.001
+
+X$Overlap <- "No Effect"
+try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- "Deletion Enhancer Both")
+try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- "Deletion Suppressor Both")
+try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= 2,]$Overlap <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= -2,]$Overlap <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+try(X[X$Z_lm_L_X1 <= 2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+try(X[X$Z_lm_L_X1 >= -2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 <= -2,]$Overlap <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 >= 2,]$Overlap <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+try(X[X$Z_lm_L_X1 >= 2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+try(X[X$Z_lm_L_X1 <= -2 & X$Z_lm_L_X2 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+try(X[X$Z_lm_L_X2 >= 2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+try(X[X$Z_lm_L_X2 <= -2 & X$Z_lm_L_X1 == 0.001,]$Overlap <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+
+X$Overlap_K <- "No Effect"
+try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- "Deletion Enhancer Both")
+try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- "Deletion Suppressor Both")
+try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " only",sep=""))
+try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " only",sep=""))
+try(X[X$Z_lm_K_X1 >= -2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " only",sep=""))
+try(X[X$Z_lm_K_X1 <= 2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " only",sep=""))
+try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 >= 2,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " Deletion Suppressor ",Name2,sep=""))
+try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 <= -2,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " Deletion Enhancer ",Name2,sep=""))
+try(X[X$Z_lm_K_X1 <= -2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name1, " No Growth ",Name2,sep=""))
+try(X[X$Z_lm_K_X1 >= 2 & X$Z_lm_K_X2 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name1, " No Growth ",Name2,sep=""))
+try(X[X$Z_lm_K_X2 <= -2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Enhancer ",Name2, " No Growth ",Name1,sep=""))
+try(X[X$Z_lm_K_X2 >= 2 & X$Z_lm_K_X1 == 0.001,]$Overlap_K <- paste("Deletion Suppressor ",Name2, " No Growth ",Name1,sep=""))
+
+X1_vals <- c(min(X$Z_lm_L_X1,na.rm = TRUE),max(X$Z_lm_L_X1,na.rm = TRUE))
+X2_vals <- c(min(X$Z_lm_L_X2,na.rm = TRUE),max(X$Z_lm_L_X2,na.rm = TRUE))
+
+X1_vals_K <- c(min(X$Z_lm_K_X1,na.rm = TRUE),max(X$Z_lm_K_X1,na.rm = TRUE))
+X2_vals_K <- c(min(X$Z_lm_K_X2,na.rm = TRUE),max(X$Z_lm_K_X2,na.rm = TRUE))
+
+X_heatmap <- X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" | colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" | colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" | colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" | colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
+X_heatmap <- X_heatmap[,c(10,1,4,5,8,9,2,3,6,7)]
+colnames(X_heatmap) <- gsub(pattern = "X1",replacement = Name1,colnames(X_heatmap))
+colnames(X_heatmap) <- gsub(pattern = "X2",replacement = Name2,colnames(X_heatmap))
+colnames(X_heatmap)[2] <- "Gene"
+
+
+colormapbreaks <- c(-12,-10,-8,-6,-4,-2,2,4,6,8,10,12)
+
+for(s in 1:dim(X3)[1]){
+  #Ontology <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "minimal")
+  #Ontology_Everything <- get_ontology(file="Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",propagate_relationships = "is_a",extract_tags = "everything")
+  #GO_ID_Arg <- "GO:0006325"
+  GO_ID_Arg_loop <- as.character(X3[s,1])
+  GOTerm_parent <- get_descendants(Ontology,roots = GO_ID_Arg_loop)
+  #GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
+  #only make plots if parent term has fewer than 500 children
+  if(length(GOTerm_parent) > 100){
+    #print(length(GOTerm_parent))
+    next()
+  }
+  
+  
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
+  if(Parent_Size < 2){
+    next()
+  }
+  if(Parent_Size > 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 45, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+  if(Parent_Size >= 1000 && Parent_Size <= 2000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 35, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 500 && Parent_Size <= 1000){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 30, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 200 && Parent_Size <= 500){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 25, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 100 && Parent_Size <= 200){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 20, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 60 && Parent_Size <= 100){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 15, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 30 && Parent_Size <= 60){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 10, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size >= 3 && Parent_Size <= 30){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                  Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                  dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                  breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                  ylab = "Gene",
+                  cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                  keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                  na.color="red", col=brewer.pal(11,"PuOr"),
+                  main=GO_Term_Name, 
+                  #ColSideColors=ev_repeat,
+                  labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  if(Parent_Size == 2){
+    pdf(file=paste(outputpath,X3[s,2],".pdf",sep=""),width = 12, height = 7, onefile = TRUE)
+    for(i in 1:length(GOTerm_parent)){
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term),"\\:",2)[,2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num,]$GO_Term)
+      #Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term,]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term,]
+      X0 <- as.matrix(Genes_Annotated_to_Term[,3:dim(Genes_Annotated_to_Term)[2]])
+      if(dim(Genes_Annotated_to_Term)[1] > 2){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+      if(dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0){
+        try(heatmap.2(x=X0,
+                      Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
+                      dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+                      breaks=colormapbreaks, symbreaks=FALSE, colsep = c(2,4,6), sepcolor= "white", offsetCol = 0.1,
+                      ylab = "Gene",
+                      cellnote = round(X0,digits=0), notecex = 0.5, key=TRUE,
+                      keysize=0.5, trace="none", density.info=c("none"), margins=c(10,8),
+                      na.color="red", col=brewer.pal(11,"PuOr"),
+                      main=GO_Term_Name, 
+                      #ColSideColors=ev_repeat,
+                      labRow=as.character(Genes_Annotated_to_Term$Gene)))
+      }
+    }
+    dev.off()
+  }
+  
+
+  
+  
+  
+}
--- a/workflow/.old/apps/r/SSscripts/ScoreAllGOTerms_From_Z_lm_V2.R
+++ b/workflow/.old/apps/r/SSscripts/ScoreAllGOTerms_From_Z_lm_V2.R
@@ -0,0 +1,239 @@
+######This code is to generate average GO term scores for all gene deletions from Zscores for a Q-HTCP screen
+
+#Run using Rscript from the command line
+#Rscript 18_0119_ScoreAllGOTerms_From.R InteractionScores.csv go_terms.tab gene_association.sgd output_directory
+
+#requires an input ZScores_Interaction.csv file generated by SumZscore_Interaction.R after applying code to generate average Z score for each deletion
+#in a Q-HTCP screen
+#requires various packages as called below (need bioconductor installed to use "org.Sc.sgd.db")
+
+#also requires some files from SGD (https://www.yeastgenome.org/) and can be downloaded from their website
+#requires the go_terms.tab and gene_association.sgd
+
+#Script will take ZScores_Interaction.csv file and calculate the average Zscore for every GO term
+#Output files will include all GO terms, GO terms curated so that only terms with an average score + or - the SD remains above 2 or below -2
+#Various other output files will give some statistics on the size of GO terms and the average scores, the SD, etc.
+
+library("stringr")
+library("org.Sc.sgd.db")
+library("plyr")
+
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the ZScores_Interaction.csv
+input_file <- Args[1]
+
+#arg 2 is the go_terms.tab
+#https://downloads.yeastgenome.org/curation/literature/go_terms.tab
+SGD_Terms_file <- Args[2]
+
+#arg 3 is the gene_association.sgd
+#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
+SGD_features_file <- Args[3]
+
+#arg 4 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[4]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[4]
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/"
+
+#X is the Z score data
+#should have ORF in first column, Gene in second column, and then Scores to be averaged in the next columns (3:max)
+#make sure NA values are included (don't use 0.001) - 0.001 is needed for clustering but this will incorrectly calculate average GTF scores
+X <- read.csv(file = input_file,stringsAsFactors=FALSE,header = TRUE)
+
+#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
+
+if(colnames(X)[1] == "OrfRep"){
+  colnames(X)[1] <- "ORF"
+}
+
+#Terms is the GO term list
+Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+#SGD features (not needed)
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/SGD_features.tab",header=FALSE,quote = "",col.names = c("SGD_ID","Feature_Type","Qualifier","ORF","Gene","Alias","Parent_Feature","Secondardy_SGD_ID","Chromosome","Start_Coordinate","Stop_Coordinate","Strand","Genetic_Position","Coordinate_Position","Sequence_Version","Description"))
+
+#all ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+#Gene_Association is the gene association to GO term file
+Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+
+#Get the ORF names associated with each gene/GO term
+Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
+#Get the numeric GO ID for matching
+Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
+
+#get all unique GO terms
+GO_Terms <- unique(Gene_Association$GO_ID)
+
+#create a character vector with just the ColNames of the input file to store the scores for each GO term
+Col_Names_X <- colnames(X)
+#create a data_frame with header from input_file
+GO_Term_Averages <- X[0,]
+#fill table with NAs same length as number of GO terms
+GO_Term_Averages[1:length(GO_Terms),] <- NA
+
+#change the first and second col names to GO_ID and Term
+colnames(GO_Term_Averages)[1] <- "GO_ID"
+colnames(GO_Term_Averages)[2] <- "Term"
+
+#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
+GO_Term_Averages$Ontology <- NA
+GO_Term_Averages$NumGenes <- NA
+GO_Term_Averages$AllPossibleGenes <- NA
+GO_Term_Averages$Genes <- NA
+GO_Term_Averages$ORFs <- NA
+
+#create a data.frame for the standard deviation info 
+GO_Term_SD <- X[0,]
+GO_Term_SD[1:length(GO_Terms),] <- NA
+
+colnames(GO_Term_SD)[1] <- "GO_ID"
+colnames(GO_Term_SD)[2] <- "Term"
+
+#GO_Term_SD$Ontology <- NA
+#GO_Term_SD$NumGenes <- NA
+#GO_Term_SD$AllPossibleGenes <- NA
+
+
+#Loop for each GO term to get an average L and K Z score
+for(i in 1:length(GO_Terms)){
+  #get the GO_Term
+  ID <- GO_Terms[i]
+  
+  #Get data.frame for all genes associated to the GO Term
+  ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
+  #get a vector of just the gene names
+  ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
+  if(length(unique(ID_AllGenes_vector)) > 4000){
+    next()
+  }
+  #get the GO term character description where numeric Terms ID matches GO_Term's ID
+  GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
+  
+  #get the Z scores for all genes in the GO_ID
+  Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
+  
+  #get the Gene names and ORFs for the term
+  GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
+  GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
+  
+  #dataframe to report the averages for a GO term
+  
+  #get the GO ID
+  GO_Term_Averages$GO_ID[i] <- as.character(ID)
+  
+  #get the term name
+  GO_Term_Averages$Term[i] <- GO_Description_Term
+  
+  
+  #get total number of genes annotated to the Term that we have in our library
+  GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
+  
+  #get total number of genes annotated to the Term in SGD
+  GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
+  
+  #get the ontology of the term
+  GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
+  
+  #calculate the average score for every column
+  for(j in 3:length(X[1,])){
+    GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
+    #GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
+  }
+  
+  #also calculate same values for the SD
+  GO_Term_SD$GO_ID[i] <- as.character(ID)
+  
+  #get the term name
+  GO_Term_SD$Term[i] <- GO_Description_Term
+  
+  
+  
+  #calculate column scores for SD
+  for(j in 3:length(X[1,])){
+    GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
+    #GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
+  }
+  
+  
+  
+  
+}
+
+#add either _Avg or _SD depending on if the calculated score is an average or SD
+colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
+colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
+
+#combine the averages with the SDs to make one big data.frame
+X2 <- cbind(GO_Term_Averages,GO_Term_SD)
+#test[ , order(names(test))]
+X2 <- X2[,order(names(X2))]
+
+X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
+
+#create output file
+write.csv(X2,file=paste(outputpath,"Average_GOTerms_All.csv",sep=""),row.names=FALSE)
+
+
+#remove NAs
+X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
+
+#identify redundant GO terms
+
+for(i in 1:length(X3[,1])){
+  #loop through each GO term - get term
+  GO_term_ID <- as.character(X3$GO_ID_Avg[i])
+  #get term in the X3
+  X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
+  #get anywhere that has the same number K_Avg value
+  X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
+  if(length(X3_Temp2[,1]) > 1){
+    if(length(unique(X3_Temp2$Genes_Avg)) == 1){
+      X3_Temp2 <- X3_Temp2[1,]
+    }
+
+  }
+  
+  if(i == 1){
+    Y <- X3_Temp2
+  }
+  
+  if(i > 1){
+    Y <- rbind(Y,X3_Temp2)
+  }
+}
+
+Y1 <- unique(Y)
+
+
+write.csv(Y1,file=paste(outputpath,"Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
+
+Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
+Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
+write.csv(Y2,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
+
+Y3 <- Y2[Y2$NumGenes_Avg > 2,]
+write.csv(Y3,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
+
+Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
+Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
+write.csv(Y4,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
+
+Y5 <- Y4[Y4$NumGenes_Avg > 2,]
+write.csv(Y5,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)
--- a/workflow/.old/apps/r/SSscripts/cmd_Doxo_SumZScore_Z_lm_Interaction_drug_all_rm_onedrug_V4_yor1_with_Dox_validationedit.R
+++ b/workflow/.old/apps/r/SSscripts/cmd_Doxo_SumZScore_Z_lm_Interaction_drug_all_rm_onedrug_V4_yor1_with_Dox_validationedit.R
--- a/workflow/.old/apps/r/SSscripts/cmd_ScoreAllGOTerms_From_Z_lm_V2.R
+++ b/workflow/.old/apps/r/SSscripts/cmd_ScoreAllGOTerms_From_Z_lm_V2.R
@@ -0,0 +1,239 @@
+######This code is to generate average GO term scores for all gene deletions from Zscores for a Q-HTCP screen
+
+#Run using Rscript from the command line
+#Rscript 18_0119_ScoreAllGOTerms_From.R InteractionScores.csv go_terms.tab gene_association.sgd output_directory
+
+#requires an input ZScores_Interaction.csv file generated by SumZscore_Interaction.R after applying code to generate average Z score for each deletion
+#in a Q-HTCP screen
+#requires various packages as called below (need bioconductor installed to use "org.Sc.sgd.db")
+
+#also requires some files from SGD (https://www.yeastgenome.org/) and can be downloaded from their website
+#requires the go_terms.tab and gene_association.sgd
+
+#Script will take ZScores_Interaction.csv file and calculate the average Zscore for every GO term
+#Output files will include all GO terms, GO terms curated so that only terms with an average score + or - the SD remains above 2 or below -2
+#Various other output files will give some statistics on the size of GO terms and the average scores, the SD, etc.
+
+library("stringr")
+library("org.Sc.sgd.db")
+library("plyr")
+
+
+#build in command args to apply this code to a given !!results sheet
+Args <- commandArgs(TRUE)
+
+#Arg 1 is the ZScores_Interaction.csv
+input_file <- Args[1]
+
+#arg 2 is the go_terms.tab
+#https://downloads.yeastgenome.org/curation/literature/go_terms.tab
+SGD_Terms_file <- Args[2]
+
+#arg 3 is the gene_association.sgd
+#https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
+SGD_features_file <- Args[3]
+
+#arg 4 is the directory to put the results into (and create that directory if needed)
+subDir <- Args[4]
+
+if (file.exists(subDir)){
+  outputpath <- subDir
+} else {
+  dir.create(file.path(subDir))
+}
+
+#define the output path (as fourth argument from Rscript)
+outputpath <- Args[4]
+
+#outputpath <- "Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0119_GTF_Averages_Doxo/HLD_GenomeWide/"
+
+#X is the Z score data
+#should have ORF in first column, Gene in second column, and then Scores to be averaged in the next columns (3:max)
+#make sure NA values are included (don't use 0.001) - 0.001 is needed for clustering but this will incorrectly calculate average GTF scores
+X <- read.csv(file = input_file,stringsAsFactors=FALSE,header = TRUE)
+
+#X <- read.csv("Documents/Hartman_Lab/Doxorubicin_and_Genistein_Experiments/18_0115_Doxo_Clust_SumZscore/ZScores_Interaction_HLD.csv",stringsAsFactors=FALSE,header=TRUE)
+
+if(colnames(X)[1] == "OrfRep"){
+  colnames(X)[1] <- "ORF"
+}
+
+#Terms is the GO term list
+Terms <- read.delim(file = SGD_Terms_file,header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/go_terms.tab",header=FALSE,quote = "",col.names = c("GO_ID","GO_Term","GO_Aspect","GO_Term_Definition"))
+
+#SGD features (not needed)
+#Terms <- read.delim("Documents/Hartman_Lab/SGD_Downloads/SGD_features.tab",header=FALSE,quote = "",col.names = c("SGD_ID","Feature_Type","Qualifier","ORF","Gene","Alias","Parent_Feature","Secondardy_SGD_ID","Chromosome","Start_Coordinate","Stop_Coordinate","Strand","Genetic_Position","Coordinate_Position","Sequence_Version","Description"))
+
+#all ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+#Gene_Association is the gene association to GO term file
+Gene_Association <- read.delim(SGD_features_file,skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+#Gene_Association <- read.delim("Documents/Hartman_Lab/SGD_Downloads/gene_association.sgd",skip=8,header=FALSE,quote="",col.names = c("Database","Database_Object_ID","Database_Object_Symbol","NOT","GO_ID","Database_Reference","Evidence","With_or_From","Aspect","Database_Object_Name","Database_Object_Synonym","Database_Object_Type","taxon","Date","Assigned_By","OtherInfo","Empty"))
+
+#Get the ORF names associated with each gene/GO term
+Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym),"\\|",2)[,1]
+#Get the numeric GO ID for matching
+Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID),"\\:",2)[,2])
+
+#get all unique GO terms
+GO_Terms <- unique(Gene_Association$GO_ID)
+
+#create a character vector with just the ColNames of the input file to store the scores for each GO term
+Col_Names_X <- colnames(X)
+#create a data_frame with header from input_file
+GO_Term_Averages <- X[0,]
+#fill table with NAs same length as number of GO terms
+GO_Term_Averages[1:length(GO_Terms),] <- NA
+
+#change the first and second col names to GO_ID and Term
+colnames(GO_Term_Averages)[1] <- "GO_ID"
+colnames(GO_Term_Averages)[2] <- "Term"
+
+#create new columns for Ontology, number genes (used to calculate the avg score), all possible genes in the GO term, and print genes/ORFs used
+GO_Term_Averages$Ontology <- NA
+GO_Term_Averages$NumGenes <- NA
+GO_Term_Averages$AllPossibleGenes <- NA
+GO_Term_Averages$Genes <- NA
+GO_Term_Averages$ORFs <- NA
+
+#create a data.frame for the standard deviation info 
+GO_Term_SD <- X[0,]
+GO_Term_SD[1:length(GO_Terms),] <- NA
+
+colnames(GO_Term_SD)[1] <- "GO_ID"
+colnames(GO_Term_SD)[2] <- "Term"
+
+#GO_Term_SD$Ontology <- NA
+#GO_Term_SD$NumGenes <- NA
+#GO_Term_SD$AllPossibleGenes <- NA
+
+
+#Loop for each GO term to get an average L and K Z score
+for(i in 1:length(GO_Terms)){
+  #get the GO_Term
+  ID <- GO_Terms[i]
+  
+  #Get data.frame for all genes associated to the GO Term
+  ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID,]
+  #get a vector of just the gene names
+  ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
+  if(length(unique(ID_AllGenes_vector)) > 4000){
+    next()
+  }
+  #get the GO term character description where numeric Terms ID matches GO_Term's ID
+  GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric,]$GO_Term[1])
+  
+  #get the Z scores for all genes in the GO_ID
+  Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector,]
+  
+  #get the Gene names and ORFs for the term
+  GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene),collapse=" | ")
+  GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF),collapse=" | ")
+  
+  #dataframe to report the averages for a GO term
+  
+  #get the GO ID
+  GO_Term_Averages$GO_ID[i] <- as.character(ID)
+  
+  #get the term name
+  GO_Term_Averages$Term[i] <- GO_Description_Term
+  
+  
+  #get total number of genes annotated to the Term that we have in our library
+  GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
+  
+  #get total number of genes annotated to the Term in SGD
+  GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
+  
+  #get the ontology of the term
+  GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
+  
+  #calculate the average score for every column
+  for(j in 3:length(X[1,])){
+    GO_Term_Averages[i,j] <- mean(Zscores_For_ID[,j],na.rm = TRUE)
+    #GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
+  }
+  
+  #also calculate same values for the SD
+  GO_Term_SD$GO_ID[i] <- as.character(ID)
+  
+  #get the term name
+  GO_Term_SD$Term[i] <- GO_Description_Term
+  
+  
+  
+  #calculate column scores for SD
+  for(j in 3:length(X[1,])){
+    GO_Term_SD[i,j] <- sd(Zscores_For_ID[,j],na.rm = TRUE)
+    #GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
+  }
+  
+  
+  
+  
+}
+
+#add either _Avg or _SD depending on if the calculated score is an average or SD
+colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages),"Avg", sep = "_")
+colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD),"SD", sep = "_")
+
+#combine the averages with the SDs to make one big data.frame
+X2 <- cbind(GO_Term_Averages,GO_Term_SD)
+#test[ , order(names(test))]
+X2 <- X2[,order(names(X2))]
+
+X2 <- X2[!is.na(X2$Z_lm_L_Avg),]
+
+#create output file
+write.csv(X2,file=paste(outputpath,"Average_GOTerms_All.csv",sep=""),row.names=FALSE)
+
+
+#remove NAs
+X3 <- X2[!is.na(X2$Z_lm_L_Avg),]
+
+#identify redundant GO terms
+
+for(i in 1:length(X3[,1])){
+  #loop through each GO term - get term
+  GO_term_ID <- as.character(X3$GO_ID_Avg[i])
+  #get term in the X3
+  X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID,]
+  #get anywhere that has the same number K_Avg value
+  X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp,]
+  if(length(X3_Temp2[,1]) > 1){
+    if(length(unique(X3_Temp2$Genes_Avg)) == 1){
+      X3_Temp2 <- X3_Temp2[1,]
+    }
+
+  }
+  
+  if(i == 1){
+    Y <- X3_Temp2
+  }
+  
+  if(i > 1){
+    Y <- rbind(Y,X3_Temp2)
+  }
+}
+
+Y1 <- unique(Y)
+
+
+write.csv(Y1,file=paste(outputpath,"Average_GOTerms_All_NonRedundantTerms.csv",sep=""),row.names = FALSE)
+
+Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2,]
+Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg),]
+write.csv(Y2,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L.csv",sep=""),row.names = FALSE)
+
+Y3 <- Y2[Y2$NumGenes_Avg > 2,]
+write.csv(Y3,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv",sep=""),row.names = FALSE)
+
+Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2,]
+Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg),]
+write.csv(Y4,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K.csv",sep=""),row.names = FALSE)
+
+Y5 <- Y4[Y4$NumGenes_Avg > 2,]
+write.csv(Y5,file=paste(outputpath,"Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv",sep=""),row.names = FALSE)