Commit earlier refactoring
This commit is contained in:
288
workflow/apps/r/createHeatMaps.R
Normal file
288
workflow/apps/r/createHeatMaps.R
Normal file
@@ -0,0 +1,288 @@
|
||||
#!/usr/bin/env Rscript
|
||||
# This script will make heatmaps for the REMc analysis
|
||||
# need to give the input "finalTable.csv" file after running REMc generated by eclipse
|
||||
|
||||
library(RColorBrewer)
|
||||
library(gplots)
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
# Set output dir
|
||||
if (length(args) > 1) {
|
||||
input_finalTable <- args[1]
|
||||
} else {
|
||||
input_finalTable <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
|
||||
}
|
||||
|
||||
if (length(args) > 2) {
|
||||
outDir <- args[2]
|
||||
} else {
|
||||
outDir <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
|
||||
}
|
||||
|
||||
hmapfile <- data.frame(read.csv(file=input_finalTable,header=TRUE,sep=",",stringsAsFactors = FALSE))
|
||||
# set NAs to NA
|
||||
hmapfile[hmapfile == -100] <- NA
|
||||
hmapfile[hmapfile == 100] <- NA
|
||||
hmapfile[hmapfile == 0.001] <- NA
|
||||
hmapfile[hmapfile == -0.001] <- NA
|
||||
|
||||
# select the number of rows based on the number of genes
|
||||
num_total_genes <- length(hmapfile[,1])
|
||||
|
||||
# Break out the cluster names so each part of the cluster origin can be accessed
|
||||
# line below removed because it adds to many genes to clusters when going past 1-0-10 since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
|
||||
# hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
|
||||
|
||||
hmapfile$cluster.origin = gsub(";"," ;",x=hmapfile$cluster.origin)
|
||||
hmapfile$cluster.origin = strsplit(hmapfile$cluster.origin,';')
|
||||
#use tail(x,n) for accessing the outward most cluster
|
||||
|
||||
clust_rounds <- 0
|
||||
for(i in 1:num_total_genes){
|
||||
if(length(hmapfile$cluster.origin[[i]]) > clust_rounds){
|
||||
clust_rounds <- length(hmapfile$cluster.origin[[i]])
|
||||
}
|
||||
}
|
||||
|
||||
unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
|
||||
unique_clusts <- unique_clusts[unique_clusts != " "]
|
||||
|
||||
# Select only the unique cluster names
|
||||
unique_clusts <- sort(unique(unlist(unique_clusts,use.names= FALSE)),decreasing=FALSE)
|
||||
num_unique_clusts <- length(unique_clusts)
|
||||
|
||||
# Base the color key on a statistical analysis of the L and K data
|
||||
# need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
|
||||
# scale() will calculate the mean and standard deviation of the entire vector, then "scale" each element by those values by subtracting the mean and dividing by the sd.
|
||||
# hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
|
||||
# change so that the L data is multiplied to be on the same scale as the K data
|
||||
|
||||
KEY_MIN <- 0
|
||||
KEY_MAX <- 0
|
||||
K_MIN <- 0
|
||||
L_MAX <- 0
|
||||
KcolumnValues <- vector()
|
||||
LcolumnValues <- vector()
|
||||
|
||||
for(i in 4:(length(hmapfile[1,]) - 2)){
|
||||
if(grepl("_Z_lm_K",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
|
||||
KcolumnValues <- append(KcolumnValues,i)
|
||||
}
|
||||
if(grepl("_Z_lm_L",colnames(hmapfile)[i],fixed=TRUE) == TRUE){
|
||||
LcolumnValues <- append(LcolumnValues,i)
|
||||
}
|
||||
}
|
||||
|
||||
# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[4]
|
||||
# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[2]
|
||||
# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm=TRUE)[4]
|
||||
# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm=TRUE)[2]
|
||||
|
||||
# Z scores are
|
||||
L_MAX <- 12
|
||||
K_MIN <- -12
|
||||
|
||||
# L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
|
||||
# hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
|
||||
|
||||
# if(grepl("SHIFT",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
|
||||
# print("FOUND SHIFT VALUES")
|
||||
# hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
|
||||
# }
|
||||
|
||||
# KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
|
||||
# KEY_MIN <- as.numeric(K_MIN)
|
||||
|
||||
KEY_MAX <- as.numeric(L_MAX)
|
||||
KEY_MIN <- as.numeric(K_MIN)
|
||||
|
||||
print(KEY_MIN)
|
||||
print(L_MAX)
|
||||
# print(L_Multiplier)
|
||||
|
||||
colormapbreaks <- c(KEY_MIN,KEY_MIN*(5/6),KEY_MIN*(4/6),KEY_MIN*(3/6),KEY_MIN*(2/6),KEY_MIN*(1/6),KEY_MAX*(1/6),KEY_MAX*(2/6),KEY_MAX*(3/6),KEY_MAX*(4/6),KEY_MAX*(5/6),KEY_MAX)
|
||||
# print(colormapbreaks)
|
||||
|
||||
# Probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
|
||||
# However since also using this to amend the first part. Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
|
||||
# then could just use these values and create spaces only between interaction values - possibly could get rid of redundant shift values if we don't want to view these
|
||||
# could we pool all the shift data/average it?
|
||||
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
|
||||
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=2)
|
||||
#ev_repeat = rep("white",length(even_columns))
|
||||
#ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
|
||||
#middle_col <- (length(hmapfile[1,]) - 5)/2
|
||||
#ev_repeat[(middle_col/2)] <- "black"
|
||||
#print(ev_repeat)
|
||||
}
|
||||
|
||||
if(grepl("Shift",colnames(hmapfile)[4],fixed=TRUE) == FALSE){
|
||||
even_columns <- seq(from= 2, to= (length(hmapfile[1,]) - 7),by=1)
|
||||
print("NO SHIFT VALS FOUND")
|
||||
}
|
||||
|
||||
#FOR THIS SCRIPT ONLY (rap tem hu script)
|
||||
#even_columns <- c(2,5,7,10,12,15,17)
|
||||
|
||||
#m <- 0
|
||||
colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1,]) - 2)])
|
||||
#print(colnames_edit)
|
||||
for(i in 1:length(colnames_edit)){
|
||||
if(grepl("Shift",colnames_edit[i],fixed=TRUE) == TRUE){
|
||||
colnames_edit[i] <- ""
|
||||
colnames_edit[i+1] <- gsub(pattern = "_Z_lm_",replacement = " ",x = colnames_edit[i+1])
|
||||
try(colnames_edit[i+1] <- gsub(pattern = "_",replacement = " ",x = colnames_edit[i+1]))
|
||||
|
||||
# INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
|
||||
# print(length(unlist(INT_store)))
|
||||
# if(length(unlist(INT_store)) == 4){
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
|
||||
# }
|
||||
# if(length(unlist(INT_store)) == 3){
|
||||
#
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
|
||||
# }
|
||||
# if(length(unlist(INT_store)) == 5){
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep=" ")
|
||||
# }
|
||||
# if(length(unlist(INT_store)) == 6){
|
||||
# colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
|
||||
# }
|
||||
|
||||
}
|
||||
}
|
||||
print(colnames_edit)
|
||||
#break()
|
||||
#colnames_edit[5] <- "TEM HLEG K"
|
||||
#colnames_edit[10] <- "TEM HL K"
|
||||
#colnames_edit[15] <- "TEM HLEG L"
|
||||
#colnames_edit[20] <- "TEM HL L"
|
||||
|
||||
|
||||
# Create the heatmaps
|
||||
for(i in 1:num_unique_clusts){
|
||||
cluster <- unique_clusts[i]
|
||||
cluster_data <- subset(hmapfile,grepl(cluster,cluster.origin))
|
||||
cluster_length <- length(cluster_data[,1])
|
||||
if(cluster_length != 1){
|
||||
X0 <- as.matrix(cluster_data[,4:(length(hmapfile[1,]) - 2)])
|
||||
if(cluster_length >= 2001){
|
||||
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=20,width=15)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor= "white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
#cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
|
||||
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
#ColSideColors=ev_repeat,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
#abline(v=0.5467,col="black")
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 201 && cluster_length <= 2000){
|
||||
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=15,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.1, key=TRUE,
|
||||
keysize=0.7, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
#abline(v=0.5316,col="black")
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 150 && cluster_length <= 200){
|
||||
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=12,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.2, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 101 && cluster_length <= 149){
|
||||
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,mypath,height=12,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length >= 60 && cluster_length <= 100){
|
||||
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=12,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
|
||||
breaks=colormapbreaks,symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.3, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length <= 59 && cluster_length >= 30){
|
||||
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=9,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA, distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
if(cluster_length <= 29){
|
||||
mypath = file.path(outDir,paste("cluster_",gsub(" ","",cluster), ".pdf",sep=""))
|
||||
pdf(file=mypath,height=7,width=12)
|
||||
heatmap.2(x=X0,
|
||||
Rowv=TRUE, Colv=NA,
|
||||
distfun = dist, hclustfun = hclust,
|
||||
dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
|
||||
breaks=colormapbreaks, symbreaks=FALSE, colsep = even_columns, sepcolor="white", offsetCol = 0.1,
|
||||
#zlim=c(-132,132),
|
||||
xlab = "Type of Media", ylab = "Gene Name",
|
||||
cellnote = round(X0,digits=0), notecex = 0.4, key=TRUE,
|
||||
keysize=1, trace="none", density.info=c("none"), margins=c(10, 8),
|
||||
na.color="red", col=brewer.pal(11,"PuOr"),
|
||||
main=cluster,
|
||||
labRow=as.character(cluster_data$Gene), labCol=colnames_edit)
|
||||
dev.off()
|
||||
}
|
||||
}
|
||||
#print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
|
||||
}
|
||||
Reference in New Issue
Block a user