This commit is contained in:
2024-08-13 15:27:53 -04:00
parent 724b292dab
commit f190967383
7 changed files with 367 additions and 298 deletions

View File

@@ -45,7 +45,7 @@ Function <- file.path(outDir, "Function", "FunctionResults.txt")
Process <- file.path(outDir, "Process", "ProcessResults.txt")
# Specify the list of input files (both CSV and TXT)
file_list <- c(Component,Process,Function)
file_list <- c(Component, Process, Function)
# Specify the output file name
output_file <- file.path(outDir, "GTFCombined.xlsx")

View File

@@ -7,7 +7,7 @@ library(plyr)
library(dplyr)
library(sos)
args=commandArgs(TRUE)
args <- commandArgs(TRUE)
if (length(args) >= 1) {
finalTable <- args[1]
@@ -30,62 +30,61 @@ if (length(args) >= 3) {
if (length(args) >= 4) {
output <- args[4]
} else {
output<- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
output <- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}
# Read in the REMc finalTable data
X = data.frame(read.csv(file=finalTable,header=TRUE,stringsAsFactors = FALSE))
X <- data.frame(read.csv(file = finalTable, header = TRUE, stringsAsFactors = FALSE))
# Read in the shift data From ../JoinInteractions
Y = data.frame(read.csv(file=shiftFile,header=TRUE,stringsAsFactors = FALSE))
Labels <- read.delim(studyInfo,skip=0,as.is=T,row.names=1,strip.white=TRUE)
Y <- data.frame(read.csv(file = shiftFile, header = TRUE, stringsAsFactors = FALSE))
Labels <- read.delim(studyInfo, skip = 0, as.is = TRUE, row.names = 1, strip.white = TRUE)
# Determine the number of cols - needed to create the correct number of new cols
Xcolnum <- length(X[1,])
ADDnum <- Xcolnum + length(Y[1,]) - 2
Xcolnum <- length(X[1, ])
ADDnum <- Xcolnum + length(Y[1, ]) - 2
# Create new columns filled with NAs to be filled with data
Xtemp= X
Xtemp[,(Xcolnum+1):ADDnum] <- NA
Xtemp <- X
Xtemp[, (Xcolnum + 1):ADDnum] <- NA
# Match the orf names in each row to a orf name in the shift data file and then add the shift data to the finalTable file
shiftTbl <-as.data.frame(matrix(nrow=1,ncol=length(Y)-2)) #the df shiftTbl must be initialized before for loop
shiftTbl < - as.data.frame(matrix(nrow = 1, ncol = length(Y) - 2)) #the df shiftTbl must be initialized before for loop
for(i in 1:length(X[,1])){
Shiftrownum = match(X[i,2],Y[,1])
shiftTbl[i,]= Y[Shiftrownum,3:length(Y[1,])]
Xtemp[i,(Xcolnum+1):ADDnum] <- Y[Shiftrownum,3:length(Y[1,])]
for (i in 1:length(X[, 1])) {
Shiftrownum <- match(X[i, 2], Y[, 1])
shiftTbl[i, ] <- Y[Shiftrownum, 3:length(Y[1, ])]
Xtemp[i, (Xcolnum + 1):ADDnum] <- Y[Shiftrownum, 3:length(Y[1, ])]
}
headerX= colnames(Xtemp)
headerY= colnames(Y)
shfHdr= headerY[3:length(headerY)]
combTbl<- X[,1:3]
lmTbl= select(Xtemp, contains('Z_lm')) #X[,(4:Xcolnum-2)]
shiftTbl<- select(Xtemp, contains('V'))
clustTbl<- select(Xtemp, contains('cluster.'))
headerX <- colnames(Xtemp)
headerY <- colnames(Y)
shfHdr <- headerY[3:length(headerY)]
combTbl <- X[, 1:3]
lmTbl <- select(Xtemp, contains("Z_lm")) #X[,(4:Xcolnum-2)]
shiftTbl <- select(Xtemp, contains("V"))
clustTbl <- select(Xtemp, contains("cluster."))
# Give the new column names the same names as in the shift file
Xcols = colnames(X)
Ycols = colnames(Y)[3:length(Y[1,])]
newCols = c(Xcols[1:Xcolnum],Ycols)
Xcols <- colnames(X)
Ycols <- colnames(Y)[3:length(Y[1, ])]
newCols <- c(Xcols[1:Xcolnum], Ycols)
# Reorder columns for generating heatmaps
combI= combTbl #Starting Template orf, Genename columns
headersRemc<-newCols #colnames(X)
newHeaders= newCols[1:3]
lmHdr= colnames(lmTbl) #newCols[4:(length(Xcols)-2)]
clstHdr= colnames(clustTbl) #select(newCols, contains('cluster.')) #newCols[3+length(lmHdr):2]
combI <- combTbl #Starting Template orf, Genename columns
headersRemc <- newCols #colnames(X)
newHeaders <- newCols[1:3]
lmHdr <- colnames(lmTbl) #newCols[4:(length(Xcols)-2)]
clstHdr <- colnames(clustTbl) #select(newCols, contains('cluster.')) #newCols[3+length(lmHdr):2]
intLvHdr= vector()
intLvHdr <- vector()
#Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
for(i in 1:(length(shiftTbl[1,]))){
combI=cbind.data.frame(combI, shiftTbl[i])
combI=cbind.data.frame(combI, lmTbl[i])
intLvHdrx= c(shfHdr[i],lmHdr[i] )
intLvHdr= c(intLvHdr,intLvHdrx)
for (i in 1:(length(shiftTbl[1, ]))) {
combI <- cbind.data.frame(combI, shiftTbl[i])
combI <- cbind.data.frame(combI, lmTbl[i])
intLvHdrx <- c(shfHdr[i], lmHdr[i])
intLvHdr <- c(intLvHdr, intLvHdrx)
}
combIHdr= c(colnames(combTbl),intLvHdr,clstHdr)
combI=cbind.data.frame(combI, clustTbl)
colnames(combI)= combIHdr
write.csv(combI,file=output, row.names=FALSE)
combIHdr <- c(colnames(combTbl), intLvHdr, clstHdr)
combI <- cbind.data.frame(combI, clustTbl)
colnames(combI) <- combIHdr
write.csv(combI, file = output, row.names = FALSE)

View File

@@ -9,18 +9,18 @@ args <- commandArgs(TRUE)
# Set output dir
if (length(args) >= 1) {
input_finalTable <- args[1]
input_finalTable <- file.path(args[1])
} else {
input_finalTable <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}
if (length(args) >= 2) {
outDir <- args[2]
outDir <- file.path(args[2])
} else {
outDir <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}
hmapfile <- data.frame(read.csv(file=input_finalTable,header=TRUE,sep=",",stringsAsFactors = FALSE))
hmapfile <- data.frame(read.csv(file = input_finalTable, header = TRUE, sep = ",", stringsAsFactors = FALSE))
# set NAs to NA
hmapfile[hmapfile == -100] <- NA
hmapfile[hmapfile == 100] <- NA
@@ -28,19 +28,20 @@ hmapfile[hmapfile == 0.001] <- NA
hmapfile[hmapfile == -0.001] <- NA
# select the number of rows based on the number of genes
num_total_genes <- length(hmapfile[,1])
num_total_genes <- length(hmapfile[, 1])
# Break out the cluster names so each part of the cluster origin can be accessed
# line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
# line below removed because it adds to many genes to clusters when going past 1-0-10
# since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
# hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
hmapfile$cluster.origin <- gsub(";", " ;", x = hmapfile$cluster.origin)
hmapfile$cluster.origin <- strsplit(hmapfile$cluster.origin, ";")
#use tail(x,n) for accessing the outward most cluster
clust_rounds <- 0
for(i in 1:num_total_genes){
if(length(hmapfile$cluster.origin[[i]]) > clust_rounds){
for (i in 1:num_total_genes) {
if (length(hmapfile$cluster.origin[[i]]) > clust_rounds) {
clust_rounds <- length(hmapfile$cluster.origin[[i]])
}
}
@@ -49,12 +50,13 @@ unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
unique_clusts <- unique_clusts[unique_clusts != " "]
# Select only the unique cluster names
unique_clusts <- sort(unique(unlist(unique_clusts,use.names= FALSE)),decreasing=FALSE)
unique_clusts <- sort(unique(unlist(unique_clusts, use.names = FALSE)), decreasing = FALSE)
num_unique_clusts <- length(unique_clusts)
# Base the color key on a statistical analysis of the L and K data
# need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
# scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
# scale() will calculate the mean and standard deviation of the entire vector,
# then "scale" each element by those values by subtracting the mean and dividing by the sd.
# hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
# change so that the L data is multiplied to be on the same scale as the K data
@@ -65,12 +67,12 @@ L_MAX <- 0
KcolumnValues <- vector()
LcolumnValues <- vector()
for(i in 4:(length(hmapfile[1,]) - 2)){
if(grepl("_Z_lm_K",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
KcolumnValues <- append(KcolumnValues,i)
for (i in 4:(length(hmapfile[1, ]) - 2)){
if (grepl("_Z_lm_K", colnames(hmapfile)[i], fixed = TRUE) == TRUE) {
KcolumnValues <- append(KcolumnValues, i)
}
if(grepl("_Z_lm_L",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
LcolumnValues <- append(LcolumnValues,i)
if (grepl("_Z_lm_L", colnames(hmapfile)[i], fixed = TRUE) == TRUE) {
LcolumnValues <- append(LcolumnValues, i)
}
}
@@ -101,38 +103,41 @@ print(KEY_MIN)
print(L_MAX)
# print(L_Multiplier)
colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
colormapbreaks <- c(KEY_MIN, KEY_MIN * (5 / 6), KEY_MIN * (4 / 6), KEY_MIN * (3 / 6), KEY_MIN * (2 / 6),
KEY_MIN * (1 / 6), KEY_MAX * (1 / 6), KEY_MAX * (2 / 6), KEY_MAX * (3 / 6), KEY_MAX * (4 / 6), KEY_MAX * (5 / 6), KEY_MAX)
# print(colormapbreaks)
# Probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
# However since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
# then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
# However since also using this to amend the first part.
# Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
# then could just use these values and create spaces only between interaction values
# possibly could get rid of redundant shift values if we don't want to view these
# could we pool all the shift data/average it?
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=2)
#ev_repeat = rep("white",length(even_columns))
#ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
#middle_col <- (length(hmapfile[1,]) - 5)/2
#ev_repeat[(middle_col/2)] <- "black"
#print(ev_repeat)
if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == TRUE) {
even_columns <- seq(from = 2, to = (length(hmapfile[1, ]) - 7), by = 2)
# ev_repeat = rep("white",length(even_columns))
# ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
# middle_col <- (length(hmapfile[1,]) - 5)/2
# ev_repeat[(middle_col/2)] <- "black"
# print(ev_repeat)
}
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == FALSE){
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=1)
if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == FALSE) {
even_columns <- seq(from = 2, to = (length(hmapfile[1, ]) - 7), by = 1)
print("NO SHIFT VALS FOUND")
}
#FOR THIS SCRIPT ONLY (rap tem hu script)
#even_columns <- c(2,5,7,10,12,15,17)
# FOR THIS SCRIPT ONLY (rap tem hu script)
# even_columns <- c(2,5,7,10,12,15,17)
#m <- 0
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1,]) - 2)])
#print(colnames_edit)
for(i in 1:length(colnames_edit)){
if(grepl("Shift",colnames_edit[i],fixed=TRUE) == TRUE){
# m <- 0
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1, ]) - 2)])
# print(colnames_edit)
for (i in 1:length(colnames_edit)) {
if (grepl("Shift", colnames_edit[i], fixed = TRUE) == TRUE) {
colnames_edit[i] <- ""
colnames_edit[i+1] <- gsub(pattern = "_Z_lm_",replacement = " ",x = colnames_edit[i+1])
try(colnames_edit[i+1] <- gsub(pattern = "_",replacement = " ",x = colnames_edit[i+1]))
colnames_edit[i + 1] <- gsub(pattern = "_Z_lm_", replacement = " ", x = colnames_edit[i + 1])
try(colnames_edit[i + 1] <- gsub(pattern = "_", replacement = " ", x = colnames_edit[i + 1]))
# INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
# print(length(unlist(INT_store)))
@@ -140,7 +145,7 @@ for(i in 1:length(colnames_edit)){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
# }
# if(length(unlist(INT_store)) == 3){
#
#
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
# }
# if(length(unlist(INT_store)) == 5){
@@ -149,140 +154,152 @@ for(i in 1:length(colnames_edit)){
# if(length(unlist(INT_store)) == 6){
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
# }
}
}
print(colnames_edit)
#break()
#colnames_edit[5] <- "TEM HLEG K"
#colnames_edit[10] <- "TEM HL K"
#colnames_edit[15] <- "TEM HLEG L"
#colnames_edit[20] <- "TEM HL L"
# break()
# colnames_edit[5] <- "TEM HLEG K"
# colnames_edit[10] <- "TEM HL K"
# colnames_edit[15] <- "TEM HLEG L"
# colnames_edit[20] <- "TEM HL L"
# Create the heatmaps
for(i in 1:num_unique_clusts){
for (i in 1:num_unique_clusts) {
cluster <- unique_clusts[i]
cluster_data <- subset(hmapfile,grepl(cluster,cluster.origin))
cluster_length <- length(cluster_data[,1])
if(cluster_length != 1){
X0 <- as.matrix(cluster_data[,4:(length(hmapfile[1,]) - 2)])
if(cluster_length >= 2001){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=20,width=15)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor= "white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
#cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
#ColSideColors=ev_repeat,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
#abline(v=0.5467,col="black")
cluster_data <- subset(hmapfile, grepl(cluster, cluster.origin))
cluster_length <- length(cluster_data[, 1])
if (cluster_length != 1) {
X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 2)])
if (cluster_length >= 2001) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
pdf(file = mypath, height = 20, width = 15)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
# zlim=c(-132, 132),
xlab = "Type of Media", ylab = "Gene Name",
# cellnote = round(X0,digits = 0), notecex = 0.1, key = TRUE,
keysize = 0.7, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
# ColSideColors=ev_repeat,
labRow = as.character(cluster_data$Gene), labCol = colnames_edit
)
# abline(v=0.5467,col="black")
dev.off()
}
if(cluster_length >= 201 && cluster_length <= 2000){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=15,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
#abline(v=0.5316,col="black")
if (cluster_length >= 201 && cluster_length <= 2000) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
pdf(file = mypath, height = 15, width = 12)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0, digits = 0), notecex = 0.1, key = TRUE,
keysize = 0.7, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow = as.character(cluster_data$Gene), labCol = colnames_edit
)
# abline(v=0.5316,col="black")
dev.off()
}
if(cluster_length >= 150 && cluster_length <= 200){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.2, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
if (cluster_length >= 150 && cluster_length <= 200) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0, digits = 0), notecex = 0.2, key = TRUE,
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow = as.character(cluster_data$Gene), labCol = colnames_edit
)
dev.off()
}
if(cluster_length >= 101 && cluster_length <= 149){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
if (cluster_length >= 101 && cluster_length <= 149) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
pdf(file = mypath, mypath, height = 12, width = 12)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0, digits = 0), notecex = 0.3, key = TRUE,
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow = as.character(cluster_data$Gene), labCol = colnames_edit
)
dev.off()
}
if(cluster_length >= 60 && cluster_length <= 100){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=12,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
breaks=colormapbreaks,symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
if (cluster_length >= 60 && cluster_length <= 100) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
pdf(file = mypath, height = 12, width = 12)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0, digits = 0), notecex = 0.3, key = TRUE,
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow = as.character(cluster_data$Gene), labCol = colnames_edit
)
dev.off()
}
if(cluster_length <= 59 && cluster_length >= 30){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=9,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
if (cluster_length <= 59 && cluster_length >= 30) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
pdf(file = mypath, height = 9, width = 12)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0, digits = 0), notecex = 0.4, key = TRUE,
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow = as.character(cluster_data$Gene), labCol = colnames_edit
)
dev.off()
}
if(cluster_length <= 29){
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
pdf(file=mypath,height=7,width=12)
heatmap.2(x=X0,
Rowv=TRUE, Colv=NA,
distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
#zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
na.color="red", col=brewer.pal(11,"PuOr"),
main=cluster,
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
if (cluster_length <= 29) {
mypath <- file.path(outDir, paste("cluster_", gsub(" ", "", cluster), sep = ""), ".pdf")
pdf(file = mypath, height = 7, width = 12)
heatmap.2(
x = X0,
Rowv = TRUE, Colv = NA,
distfun = dist, hclustfun = hclust,
dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
# zlim=c(-132,132),
xlab = "Type of Media", ylab = "Gene Name",
cellnote = round(X0, digits = 0), notecex = 0.4, key = TRUE,
keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
na.color = "red", col = brewer.pal(11, "PuOr"),
main = cluster,
labRow = as.character(cluster_data$Gene), labCol = colnames_edit
)
dev.off()
}
}
#print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
# print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
}

View File

@@ -20,25 +20,25 @@ library(htmlwidgets)
# Parse arguments
args <- commandArgs(TRUE)
inputFile <- args[1]
inputFile <- file.path(args[1])
# Set output dir
if (length(args) >= 2) {
outDir <- args[2]
outDir <- file.path(args[2])
} else {
outDir <- "/ZScores/" # for legacy workflow
}
# Set StudyInfo file path
if (length(args) >= 3) {
studyInfo <- args[3]
studyInfo <- file.path(args[3])
} else {
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
}
# Set SGDgeneList file path
if (length(args) >= 4) {
SGDgeneList <- args[4]
SGDgeneList <- file.path(args[4])
} else {
SGDgeneList <- "../Code/SGD_features.tab" # for legacy workflow
}
@@ -54,7 +54,7 @@ if (length(args) >= 5) {
}
delBGFactor <- as.numeric(delBGFactor)
if (is.na(delBGFactor)) {
delBGFactor <- 3 # Recommended by Sean
delBGFactor <- 3 # recommended by Sean
}
print(paste("The Standard Deviation Value is:", delBGFactor))
@@ -68,21 +68,25 @@ if (length(args) >= 6) {
}
expNumber <- as.numeric(expNumber)
outDir_QC <- paste(outDir, "QC/", sep = "")
outDir_QC <- file.path(outDir, "QC")
if (!file.exists(outDir)) {
dir.create(file.path(outDir))
dir.create(outDir)
}
if (!file.exists(outDir_QC)) {
dir.create(file.path(outDir_QC))
}
# Capture Exp_ number, use it to Save args[2]{std}to Labels field and then
# write to Labels to studyInfo.txt for future reference
Labels <- read.csv(file = studyInfo, stringsAsFactors = FALSE) # sep = ","
Labels[expNumber, 3] <- delBGFactor
write.csv(Labels, file = studyInfo, row.names = FALSE)
options(width = 1000)
ls.str()
# Write delBJFactor to the StudyInfo file
# TODO we probably shouldn't be doing this, need one source of truth
# TODO disabling this for now
# Labels <- read.csv(file = studyInfo, stringsAsFactors = FALSE) # sep = ","
# Labels[expNumber, 3] <- delBGFactor
# write.csv(Labels, file = studyInfo, row.names = FALSE)
# Begin User Data Selection Section
@@ -366,19 +370,19 @@ Raw_l_vs_K_beforeQC <-
ggtitle("Raw L vs K before QC") +
theme_publication_legend_right()
pdf(paste(outDir_QC, "Raw_L_vs_K_beforeQC.pdf", sep = ""), width = 12, height = 8)
pdf(file.path(outDir_QC, "Raw_L_vs_K_beforeQC.pdf"), width = 12, height = 8)
Raw_l_vs_K_beforeQC
dev.off()
pgg <- ggplotly(Raw_l_vs_K_beforeQC)
plotly_path <- paste(outDir_QC, "Raw_L_vs_K_beforeQC.html", sep = "")
plotly_path <- file.path(outDir_QC, "Raw_L_vs_K_beforeQC.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Set delta background tolerance based on 3 sds from the mean delta background
Delta_Background_Tolerance <- mean(X$Delta_Backgrd) + (delBGFactor * sd(X$Delta_Backgrd))
# Delta_Background_Tolerance <- mean(X$Delta_Backgrd)+(3*sd(X$Delta_Backgrd))
print(paste("Delta_Background_Tolerance is", Delta_Background_Tolerance, sep = " "))
sprintf("Delta_Background_Tolerance is %f", Delta_Background_Tolerance)
Plate_Analysis_Delta_Backgrd <-
ggplot(X, aes(Scan, Delta_Backgrd, color = as.factor(Conc_Num))) +
@@ -417,13 +421,13 @@ X_Delta_Backgrd_above_Tolerance_L_vs_K <-
) +
theme_publication_legend_right()
pdf(paste(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf", sep = ""), width = 12, height = 8)
pdf(file.path(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.pdf"), width = 12, height = 8)
X_Delta_Backgrd_above_Tolerance_L_vs_K
dev.off()
pgg <- ggplotly(X_Delta_Backgrd_above_Tolerance_L_vs_K)
plotly_path <- paste(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html", sep = "")
plotly_path <- file.path(outDir_QC, "Raw_L_vs_K_for_strains_above_deltabackgrd_threshold.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Frequency plot for all data vs. the delta_background
@@ -434,7 +438,7 @@ DeltaBackground_Frequency_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor
DeltaBackground_Bar_Plot <- ggplot(X, aes(Delta_Backgrd, color = as.factor(Conc_Num))) + geom_bar() +
ggtitle("Bar plot for Delta Background by Conc All Data") + theme_publication_legend_right()
pdf(file = paste(outDir_QC, "Frequency_Delta_Background.pdf", sep = ""), width = 12, height = 8)
pdf(file.path(outDir_QC, "Frequency_Delta_Background.pdf"), width = 12, height = 8)
print(DeltaBackground_Frequency_Plot)
print(DeltaBackground_Bar_Plot)
dev.off()
@@ -568,7 +572,7 @@ Plate_Analysis_Delta_Backgrd_Box_afterQC <-
theme_publication()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis.pdf", sep = ""), width = 14, height = 9)
pdf(file.path(outDir_QC, "Plate_Analysis.pdf"), width = 14, height = 9)
Plate_Analysis_L
Plate_Analysis_L_afterQC
Plate_Analysis_K
@@ -582,7 +586,7 @@ Plate_Analysis_Delta_Backgrd_afterQC
dev.off()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis_Boxplots.pdf", sep = ""), width = 18, height = 9)
pdf(file.path(outDir_QC, "Plate_Analysis_Boxplots.pdf"), width = 18, height = 9)
Plate_Analysis_L_Box
Plate_Analysis_L_Box_afterQC
Plate_Analysis_K_Box
@@ -713,7 +717,7 @@ Plate_Analysis_Delta_Backgrd_Box_afterQC_Z <-
theme_publication()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis_noZeros.pdf", sep = ""), width = 14, height = 9)
pdf(file.path(outDir_QC, "Plate_Analysis_noZeros.pdf"), width = 14, height = 9)
Plate_Analysis_L_afterQC_Z
Plate_Analysis_K_afterQC_Z
Plate_Analysis_r_afterQC_Z
@@ -722,7 +726,7 @@ Plate_Analysis_Delta_Backgrd_afterQC_Z
dev.off()
# Print the plate analysis data before and after QC
pdf(file = paste(outDir_QC, "Plate_Analysis_noZeros_Boxplots.pdf", sep = ""), width = 18, height = 9)
pdf(file.path(outDir_QC, "Plate_Analysis_noZeros_Boxplots.pdf"), width = 18, height = 9)
Plate_Analysis_L_Box_afterQC_Z
Plate_Analysis_K_Box_afterQC_Z
Plate_Analysis_r_Box_afterQC_Z
@@ -768,7 +772,7 @@ X_stats_ALL <- ddply(
)
# print(X_stats_ALL_L)
write.csv(X_stats_ALL, file = paste(outDir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
write.csv(X_stats_ALL, file.path(outDir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
# Part 3 - Generate summary statistics and calculate the max theoretical L value
# Calculate the Z score at each drug conc for each deletion strain
@@ -885,7 +889,7 @@ for (s in Background_Strains) {
se_AUC = sd_AUC / sqrt(N - 1)
)
write.csv(X_stats_BY, file = paste(outDir, "SummaryStats_BackgroundStrains.csv"), row.names = FALSE)
write.csv(X_stats_BY, file.path(outDir, "SummaryStats_BackgroundStrains.csv"), row.names = FALSE)
# Calculate the max theoretical L values
# Only look for max values when K is within 2SD of the ref strain
@@ -964,7 +968,7 @@ for (s in Background_Strains) {
write.csv(
X_stats_BY_L_within_2SD_K,
file = paste(outDir_QC, "Max_Observed_L_Vals_for_spots_within_2SD_K.csv", sep = ""),
file.path(outDir_QC, "Max_Observed_L_Vals_for_spots_within_2SD_K.csv"),
row.names = FALSE
)
@@ -991,13 +995,13 @@ for (s in Background_Strains) {
ggtitle("Raw L vs K for strains falling outside 2SD of the K mean at each conc") +
theme_publication_legend_right()
pdf(paste(outDir_QC, "Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf", sep = ""), width = 10, height = 8)
pdf(file.path(outDir_QC, "Raw_L_vs_K_for_strains_2SD_outside_mean_K.pdf"), width = 10, height = 8)
print(Outside_2SD_K_L_vs_K)
dev.off()
pgg <- ggplotly(Outside_2SD_K_L_vs_K)
plotly_path <- paste(outDir_QC, "RawL_vs_K_for_strains_outside_2SD_K.html", sep = "")
plotly_path <- file.path(outDir_QC, "RawL_vs_K_for_strains_outside_2SD_K.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
Outside_2SD_K_delta_background_vs_K <-
@@ -1006,13 +1010,13 @@ for (s in Background_Strains) {
ggtitle("DeltaBackground vs K for strains falling outside 2SD of the K mean at each conc") +
theme_publication_legend_right()
pdf(paste(outDir_QC, "DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf", sep = ""), width = 10, height = 8)
pdf(file.path(outDir_QC, "DeltaBackground_vs_K_for_strains_2SD_outside_mean_K.pdf"), width = 10, height = 8)
Outside_2SD_K_delta_background_vs_K
dev.off()
pgg <- ggplotly(Outside_2SD_K_delta_background_vs_K)
# pgg
plotly_path <- paste(outDir_QC, "DeltaBackground_vs_K_for_strains_outside_2SD_K.html", sep = "")
plotly_path <- file.path(outDir_QC, "DeltaBackground_vs_K_for_strains_outside_2SD_K.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
# Get the background strain mean values at the no drug conc to calculate shift
@@ -1046,7 +1050,7 @@ for (s in Background_Strains) {
X2_temp <- X2[X2$Conc_Num == Concentration, ]
if (Concentration == 0) {
X2_new <- X2_temp
print(paste("Check loop order, conc =", Concentration, sep = " "))
sprintf("Check loop order, conc = %f", Concentration)
}
if (Concentration > 0) {
try(X2_temp[X2_temp$l == 0 & !is.na(X2_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i])
@@ -1055,7 +1059,7 @@ for (s in Background_Strains) {
# X2_temp[X2_temp$K == 0, ]$K <- X_stats_ALL_K$max[i]
# X2_temp[X2_temp$r == 0, ]$r <- X_stats_ALL_r$max[i]
# X2_temp[X2_temp$AUC == 0, ]$AUC <- X_stats_ALL_AUC$max[i]
print(paste("Check loop order, conc =", Concentration, sep = " "))
sprintf("Check loop order, conc = %f", Concentration)
X2_new <- rbind(X2_new, X2_temp)
}
}
@@ -1075,13 +1079,13 @@ for (s in Background_Strains) {
X2_RF_temp <- X2_RF[X2_RF$Conc_Num == Concentration, ]
if (Concentration == 0) {
X2_RF_new <- X2_RF_temp
print(paste("Check loop order, conc =", Concentration, sep = " "))
sprintf("Check loop order, conc = %f", Concentration)
}
if (Concentration > 0) {
try(X2_RF_temp[X2_RF_temp$l == 0 & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i])
try(X2_temp[X2_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_temp$l), ]$SM <- 1)
try(X2_RF_temp[X2_RF_temp$l >= X_stats_BY_L_within_2SD_K$max[i] & !is.na(X2_RF_temp$l), ]$l <- X_stats_BY_L_within_2SD_K$max[i])
print(paste("If error, refs have no L values outside theoretical max L, for REFs, conc =", Concentration, sep = " "))
sprintf("If error, refs have no L values outside theoretical max L, for REFs, conc = %f", Concentration)
X2_RF_new <- rbind(X2_RF_new, X2_RF_temp)
}
}
@@ -1446,7 +1450,7 @@ for (s in Background_Strains) {
InteractionScores_RF$Z_lm_AUC <- (InteractionScores_RF$lm_Score_AUC - lm_mean_AUC) / (lm_sd_AUC)
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$Z_lm_L, decreasing = TRUE), ]
InteractionScores_RF <- InteractionScores_RF[order(InteractionScores_RF$NG, decreasing = TRUE), ]
write.csv(InteractionScores_RF, paste(outDir, "RF_ZScores_Interaction.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores_RF, file.path(outDir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
for (i in 1:num_genes_RF) {
Gene_Sel <- unique(InteractionScores_RF$OrfRep)[i]
@@ -1548,7 +1552,7 @@ for (s in Background_Strains) {
}
}
print("Pass RF ggplot loop")
write.csv(X_stats_interaction_ALL_RF_final, paste(outDir, "RF_ZScore_Calculations.csv", sep = ""), row.names = FALSE)
write.csv(X_stats_interaction_ALL_RF_final, file.path(outDir, "RF_ZScore_Calculations.csv"), row.names = FALSE)
# Part 5 - Get Zscores for Gene deletion strains
@@ -1904,7 +1908,7 @@ for (s in Background_Strains) {
InteractionScores <- InteractionScores[order(InteractionScores$NG, decreasing = TRUE), ]
df_order_by_OrfRep <- unique(InteractionScores$OrfRep)
# X_stats_interaction_ALL <- X_stats_interaction_ALL[order(X_stats_interaction_ALL$NG, decreasing = TRUE), ]
write.csv(InteractionScores, paste(outDir, "ZScores_Interaction.csv", sep = ""), row.names = FALSE)
write.csv(InteractionScores, file.path(outDir, "ZScores_Interaction.csv"), row.names = FALSE)
InteractionScores_deletion_enhancers_L <-
InteractionScores[InteractionScores$Avg_Zscore_L >= 2, ]
@@ -1957,25 +1961,25 @@ for (s in Background_Strains) {
InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K[
!is.na(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K$OrfRep), ]
write.csv(InteractionScores_deletion_enhancers_L,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_L.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_K,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_K.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_L,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_L.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_K,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_K.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_L,
paste(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_L,
paste(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_lm_Suppressors_AvgZscore_K,
paste(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_Avg_Zscore_Suppressors_lm_K,
paste(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv"), row.names = FALSE)
# Get enhancers and suppressors for linear regression
InteractionScores_deletion_enhancers_L_lm <-
@@ -2010,19 +2014,19 @@ for (s in Background_Strains) {
!is.na(InteractionScores_deletion_enhancers_and_Suppressors_K_lm$OrfRep), ]
write.csv(InteractionScores_deletion_enhancers_L_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_K_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_L_lm,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_suppressors_K_lm,
paste(outDir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_L_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv", sep = ""), row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv"), row.names = FALSE)
write.csv(InteractionScores_deletion_enhancers_and_Suppressors_K_lm,
paste(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv", sep = ""), row.names = FALSE)
write.csv(Labels, file = paste(studyInfo), row.names = FALSE)
# write.table(Labels, file = paste("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE)
file.path(outDir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv"), row.names = FALSE)
# write.csv(Labels, studyInfo, row.names = FALSE)
# write.table(Labels, file.path("../Code/StudyInfo.txt"), sep = "\t", row.names = FALSE)
for (i in 1:num_genes) {
Gene_Sel <- unique(InteractionScores$OrfRep)[i]
@@ -2113,11 +2117,11 @@ for (s in Background_Strains) {
}
}
print("Pass Int ggplot loop")
write.csv(X_stats_interaction_ALL_final, paste(outDir, "ZScore_Calculations.csv", sep = ""), row.names = FALSE)
write.csv(X_stats_interaction_ALL_final, file.path(outDir, "ZScore_Calculations.csv"), row.names = FALSE)
Blank <- ggplot(X2_RF) + geom_blank()
pdf(paste(outDir, "InteractionPlots.pdf", sep = ""), width = 16, height = 16, onefile = TRUE)
pdf(file.path(outDir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
X_stats_X2_RF <- ddply(
X2_RF,
@@ -2344,7 +2348,7 @@ for (s in Background_Strains) {
dev.off()
pdf(paste(outDir, "RF_InteractionPlots.pdf", sep = ""), width = 16, height = 16, onefile = TRUE)
pdf(file.path(outDir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
X_stats_X2_RF <- ddply(
X2_RF,
@@ -2732,7 +2736,7 @@ for (s in Background_Strains) {
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
pdf(file.path(outDir, "RankPlots.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD,
@@ -2866,7 +2870,7 @@ for (s in Background_Strains) {
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots_lm.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
pdf(file.path(outDir, "RankPlots_lm.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD_lm,
@@ -2916,7 +2920,7 @@ for (s in Background_Strains) {
get_lm_AUC <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Avg_Zscore_AUC)
AUC_lm <- summary(get_lm_AUC)
pdf(paste(outDir, "Avg_Zscore_vs_lm_NA_rm.pdf", sep = ""), width = 16, height = 12, onefile = TRUE)
pdf(file.path(outDir, "Avg_Zscore_vs_lm_NA_rm.pdf"), width = 16, height = 12, onefile = TRUE)
print(ggplot(X_NArm, aes(Avg_Zscore_L, Z_lm_L)) +
geom_point(aes(color = Overlap), shape = 3) +
@@ -2955,7 +2959,7 @@ for (s in Background_Strains) {
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(L_lm$r.squared, 2))) + theme_publication_legend_right()
pgg <- ggplotly(lm_v_Zscore_L)
plotly_path <- paste(outDir, "Avg_Zscore_vs_lm_NA_rm.html", sep = "")
plotly_path <- file.path(outDir, "Avg_Zscore_vs_lm_NA_rm.html")
saveWidget(pgg, file = plotly_path, selfcontained = TRUE)
X_NArm$L_Rank <- rank(X_NArm$Avg_Zscore_L)
@@ -2978,7 +2982,7 @@ for (s in Background_Strains) {
AUC_lm2 <- summary(get_lm_AUC2)
num_genes_NArm2 <- (dim(X_NArm)[1]) / 2
pdf(paste(outDir, "Avg_Zscore_vs_lm_ranked_NA_rm.pdf", sep = ""), width = 16, height = 12, onefile = TRUE)
pdf(file.path(outDir, "Avg_Zscore_vs_lm_ranked_NA_rm.pdf"), width = 16, height = 12, onefile = TRUE)
print(
ggplot(X_NArm, aes(L_Rank, L_Rank_lm)) +
@@ -3114,7 +3118,7 @@ for (s in Background_Strains) {
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots_naRM.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
pdf(file.path(outDir, "RankPlots_naRM.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD,
@@ -3234,7 +3238,7 @@ for (s in Background_Strains) {
geom_hline(yintercept = c(-3, 3)) + geom_point(size = 0.1, shape = 3) +
theme_publication()
pdf(paste(outDir, "RankPlots_lm_naRM.pdf", sep = ""), width = 18, height = 12, onefile = TRUE)
pdf(file.path(outDir, "RankPlots_lm_naRM.pdf"), width = 18, height = 12, onefile = TRUE)
grid.arrange(
Rank_L_1SD_lm,
@@ -3273,7 +3277,7 @@ L_lm_5 <- summary(get_lm_5)
get_lm_6 <- lm(X_NArm$Z_lm_AUC ~ X_NArm$Z_lm_r)
L_lm_6 <- summary(get_lm_6)
pdf(file = paste(outDir, "Correlation_CPPs.pdf", sep = ""), width = 10, height = 7, onefile = TRUE)
pdf(file.path(outDir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE)
ggplot(X_NArm, aes(Z_lm_L, Z_lm_K)) +
geom_point(shape = 3, color = "gray70") +
@@ -3482,5 +3486,5 @@ ggplot(X_NArm, aes(Z_lm_r, Z_lm_AUC)) +
dev.off()
# write.csv(Labels, file = paste("../Code/Parameters.csv"), row.names = FALSE)
# write.csv(Labels, file.path("../Code/Parameters.csv"), row.names = FALSE)
timestamp()