Commit earlier refactoring

2024-07-29 11:44:45 -04:00
parent 29cbce0754
commit 527068e683
294 changed files with 5524008 additions and 0 deletions
--- a/workflow/apps/r/addShiftVals.R
+++ b/workflow/apps/r/addShiftVals.R
@@ -0,0 +1,91 @@
+#!/usr/bin/env Rscript
+# This script will add the shift data to the finalTable.csv file
+#
+# May want to reorder columns in excel before making heatmaps - otherwise all the shift data will be plotted next to each other.
+
+library(plyr)
+library(dplyr)
+library(sos)
+
+args=commandArgs(TRUE)
+
+if (length(args) > 1) {
+  finalTable <- args[1]
+} else {
+  finalTable <- "REMcRdy_lm_only.csv-finalTable.csv"  # for legacy workflow
+}
+
+if (length(args) > 2) {
+  shiftFile <- args[2]
+} else {
+  shiftFile <- "Shift_only.csv"  # for legacy workflow
+}
+
+if (length(args) > 3) {
+  studyInfo <- args[3]
+} else {
+  studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
+}
+
+if (length(args) > 4) {
+  output <- args[4]
+} else {
+  output<- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
+}
+
+
+# Read in the REMc finalTable data
+X = data.frame(read.csv(file=finalTable,header=TRUE,stringsAsFactors = FALSE))
+# Read in the shift data From ../JoinInteractions
+Y = data.frame(read.csv(file=shiftFile,header=TRUE,stringsAsFactors = FALSE))
+Labels <- read.delim(studyInfo,skip=0,as.is=T,row.names=1,strip.white=TRUE)
+
+# Determine the number of cols - needed to create the correct number of new cols
+Xcolnum <- length(X[1,])
+ADDnum <- Xcolnum + length(Y[1,]) - 2
+
+# Create new columns filled with NAs to be filled with data
+Xtemp= X
+Xtemp[,(Xcolnum+1):ADDnum] <- NA
+
+# Match the orf names in each row to a orf name in the shift data file and then add the shift data to the finalTable file
+shiftTbl <-as.data.frame(matrix(nrow=1,ncol=length(Y)-2)) #the df shiftTbl must be initialized before for loop
+
+for(i in 1:length(X[,1])){
+  Shiftrownum = match(X[i,2],Y[,1])
+  shiftTbl[i,]= Y[Shiftrownum,3:length(Y[1,])]
+  Xtemp[i,(Xcolnum+1):ADDnum] <- Y[Shiftrownum,3:length(Y[1,])]
+}
+headerX= colnames(Xtemp)
+headerY= colnames(Y)
+shfHdr= headerY[3:length(headerY)]
+combTbl<- X[,1:3]
+lmTbl= select(Xtemp,  contains('Z_lm'))  #X[,(4:Xcolnum-2)]
+shiftTbl<- select(Xtemp,  contains('V'))
+clustTbl<- select(Xtemp, contains('cluster.'))
+
+# Give the new column names the same names as in the shift file
+Xcols = colnames(X)
+Ycols = colnames(Y)[3:length(Y[1,])]
+newCols = c(Xcols[1:Xcolnum],Ycols)
+
+# Reorder columns for generating heatmaps
+combI= combTbl  #Starting Template orf, Genename columns
+headersRemc<-newCols  #colnames(X)
+newHeaders= newCols[1:3]
+lmHdr= colnames(lmTbl)   #newCols[4:(length(Xcols)-2)]
+clstHdr= colnames(clustTbl)  #select(newCols, contains('cluster.'))  #newCols[3+length(lmHdr):2]
+
+intLvHdr= vector()
+#Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
+for(i in 1:(length(shiftTbl[1,]))){
+  combI=cbind.data.frame(combI, shiftTbl[i])
+  combI=cbind.data.frame(combI, lmTbl[i])
+  intLvHdrx= c(shfHdr[i],lmHdr[i] )
+  intLvHdr= c(intLvHdr,intLvHdrx)
+}
+
+combIHdr= c(colnames(combTbl),intLvHdr,clstHdr)
+combI=cbind.data.frame(combI, clustTbl)
+colnames(combI)= combIHdr
+write.csv(combI,file=output, row.names=FALSE)