Squashed initial commit

2024-09-10 13:47:29 -04:00
commit 8ebb6ad265
6221 changed files with 2512206 additions and 0 deletions
--- a/qhtcp-workflow/apps/r/addShiftVals.R
+++ b/qhtcp-workflow/apps/r/addShiftVals.R
@@ -0,0 +1,90 @@
+#!/usr/bin/env Rscript
+# This script will add the shift data to the finalTable.csv file
+#
+# May want to reorder columns in excel before making heatmaps - otherwise all the shift data will be plotted next to each other.
+
+library(plyr)
+library(dplyr)
+library(sos)
+
+args <- commandArgs(TRUE)
+
+if (length(args) >= 1) {
+  finalTable <- file.path(args[1])
+} else {
+  finalTable <- "REMcRdy_lm_only.csv-finalTable.csv"  # for legacy workflow
+}
+
+if (length(args) >= 2) {
+  shiftFile <- file.path(args[2])
+} else {
+  shiftFile <- "Shift_only.csv"  # for legacy workflow
+}
+
+if (length(args) >= 3) {
+  studyInfo <- file.path(args[3])
+} else {
+  studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
+}
+
+if (length(args) >= 4) {
+  output <- file.path(args[4])
+} else {
+  output <- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
+}
+
+# Read in the REMc finalTable data
+X <- data.frame(read.csv(file = finalTable, header = TRUE, stringsAsFactors = FALSE))
+# Read in the shift data From ../JoinInteractions
+Y <- data.frame(read.csv(file = shiftFile, header = TRUE, stringsAsFactors = FALSE))
+Labels <- read.delim(studyInfo, skip = 0, as.is = TRUE, row.names = 1, strip.white = TRUE)
+
+# Determine the number of cols - needed to create the correct number of new cols
+Xcolnum <- length(X[1, ])
+ADDnum <- Xcolnum + length(Y[1, ]) - 2
+
+# Create new columns filled with NAs to be filled with data
+Xtemp <- X
+Xtemp[, (Xcolnum + 1):ADDnum] <- NA
+
+# Match the orf names in each row to a orf name in the shift data file and then add the shift data to the finalTable file
+shiftTbl < - as.data.frame(matrix(nrow = 1, ncol = length(Y) - 2)) #the df shiftTbl must be initialized before for loop
+
+for (i in 1:length(X[, 1])) {
+  Shiftrownum <- match(X[i, 2], Y[, 1])
+  shiftTbl[i, ] <- Y[Shiftrownum, 3:length(Y[1, ])]
+  Xtemp[i, (Xcolnum + 1):ADDnum] <- Y[Shiftrownum, 3:length(Y[1, ])]
+}
+headerX <- colnames(Xtemp)
+headerY <- colnames(Y)
+shfHdr <- headerY[3:length(headerY)]
+combTbl <- X[, 1:3]
+lmTbl <- select(Xtemp,  contains("Z_lm"))  #X[,(4:Xcolnum-2)]
+shiftTbl <- select(Xtemp,  contains("V"))
+clustTbl <- select(Xtemp, contains("cluster."))
+
+# Give the new column names the same names as in the shift file
+Xcols <- colnames(X)
+Ycols <- colnames(Y)[3:length(Y[1, ])]
+newCols <- c(Xcols[1:Xcolnum], Ycols)
+
+# Reorder columns for generating heatmaps
+combI <- combTbl  #Starting Template orf, Genename columns
+headersRemc <- newCols  #colnames(X)
+newHeaders <- newCols[1:3]
+lmHdr <- colnames(lmTbl)   #newCols[4:(length(Xcols)-2)]
+clstHdr <- colnames(clustTbl)  #select(newCols, contains('cluster.'))  #newCols[3+length(lmHdr):2]
+
+intLvHdr <- vector()
+#Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
+for (i in 1:(length(shiftTbl[1, ]))) {
+  combI <- cbind.data.frame(combI, shiftTbl[i])
+  combI <- cbind.data.frame(combI, lmTbl[i])
+  intLvHdrx <- c(shfHdr[i], lmHdr[i])
+  intLvHdr <- c(intLvHdr, intLvHdrx)
+}
+
+combIHdr <- c(colnames(combTbl), intLvHdr, clstHdr)
+combI <- cbind.data.frame(combI, clustTbl)
+colnames(combI) <- combIHdr
+write.csv(combI, file = output, row.names = FALSE)