Files
hartman-server/workflow/apps/r/addShiftVals.R

92 sor
2.9 KiB
R

#!/usr/bin/env Rscript
# This script will add the shift data to the finalTable.csv file
#
# May want to reorder columns in excel before making heatmaps - otherwise all the shift data will be plotted next to each other.
library(plyr)
library(dplyr)
library(sos)
args=commandArgs(TRUE)
if (length(args) > 1) {
finalTable <- args[1]
} else {
finalTable <- "REMcRdy_lm_only.csv-finalTable.csv" # for legacy workflow
}
if (length(args) > 2) {
shiftFile <- args[2]
} else {
shiftFile <- "Shift_only.csv" # for legacy workflow
}
if (length(args) > 3) {
studyInfo <- args[3]
} else {
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
}
if (length(args) > 4) {
output <- args[4]
} else {
output<- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
}
# Read in the REMc finalTable data
X = data.frame(read.csv(file=finalTable,header=TRUE,stringsAsFactors = FALSE))
# Read in the shift data From ../JoinInteractions
Y = data.frame(read.csv(file=shiftFile,header=TRUE,stringsAsFactors = FALSE))
Labels <- read.delim(studyInfo,skip=0,as.is=T,row.names=1,strip.white=TRUE)
# Determine the number of cols - needed to create the correct number of new cols
Xcolnum <- length(X[1,])
ADDnum <- Xcolnum + length(Y[1,]) - 2
# Create new columns filled with NAs to be filled with data
Xtemp= X
Xtemp[,(Xcolnum+1):ADDnum] <- NA
# Match the orf names in each row to a orf name in the shift data file and then add the shift data to the finalTable file
shiftTbl <-as.data.frame(matrix(nrow=1,ncol=length(Y)-2)) #the df shiftTbl must be initialized before for loop
for(i in 1:length(X[,1])){
Shiftrownum = match(X[i,2],Y[,1])
shiftTbl[i,]= Y[Shiftrownum,3:length(Y[1,])]
Xtemp[i,(Xcolnum+1):ADDnum] <- Y[Shiftrownum,3:length(Y[1,])]
}
headerX= colnames(Xtemp)
headerY= colnames(Y)
shfHdr= headerY[3:length(headerY)]
combTbl<- X[,1:3]
lmTbl= select(Xtemp, contains('Z_lm')) #X[,(4:Xcolnum-2)]
shiftTbl<- select(Xtemp, contains('V'))
clustTbl<- select(Xtemp, contains('cluster.'))
# Give the new column names the same names as in the shift file
Xcols = colnames(X)
Ycols = colnames(Y)[3:length(Y[1,])]
newCols = c(Xcols[1:Xcolnum],Ycols)
# Reorder columns for generating heatmaps
combI= combTbl #Starting Template orf, Genename columns
headersRemc<-newCols #colnames(X)
newHeaders= newCols[1:3]
lmHdr= colnames(lmTbl) #newCols[4:(length(Xcols)-2)]
clstHdr= colnames(clustTbl) #select(newCols, contains('cluster.')) #newCols[3+length(lmHdr):2]
intLvHdr= vector()
#Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
for(i in 1:(length(shiftTbl[1,]))){
combI=cbind.data.frame(combI, shiftTbl[i])
combI=cbind.data.frame(combI, lmTbl[i])
intLvHdrx= c(shfHdr[i],lmHdr[i] )
intLvHdr= c(intLvHdr,intLvHdrx)
}
combIHdr= c(colnames(combTbl),intLvHdr,clstHdr)
combI=cbind.data.frame(combI, clustTbl)
colnames(combI)= combIHdr
write.csv(combI,file=output, row.names=FALSE)