addShiftVals.R 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. #!/usr/bin/env Rscript
  2. # This script will add the shift data to the finalTable.csv file
  3. #
  4. # May want to reorder columns in excel before making heatmaps - otherwise all the shift data will be plotted next to each other.
  5. library(plyr)
  6. library(dplyr)
  7. library(sos)
  8. args <- commandArgs(TRUE)
  9. if (length(args) >= 1) {
  10. finalTable <- file.path(args[1])
  11. } else {
  12. finalTable <- "REMcRdy_lm_only.csv-finalTable.csv" # for legacy workflow
  13. }
  14. if (length(args) >= 2) {
  15. shiftFile <- file.path(args[2])
  16. } else {
  17. shiftFile <- "Shift_only.csv" # for legacy workflow
  18. }
  19. if (length(args) >= 3) {
  20. studyInfo <- file.path(args[3])
  21. } else {
  22. studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
  23. }
  24. if (length(args) >= 4) {
  25. output <- file.path(args[4])
  26. } else {
  27. output <- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
  28. }
  29. # Read in the REMc finalTable data
  30. X <- data.frame(read.csv(file = finalTable, header = TRUE, stringsAsFactors = FALSE))
  31. # Read in the shift data From ../JoinInteractions
  32. Y <- data.frame(read.csv(file = shiftFile, header = TRUE, stringsAsFactors = FALSE))
  33. Labels <- read.delim(studyInfo, skip = 0, as.is = TRUE, row.names = 1, strip.white = TRUE)
  34. # Determine the number of cols - needed to create the correct number of new cols
  35. Xcolnum <- length(X[1, ])
  36. ADDnum <- Xcolnum + length(Y[1, ]) - 2
  37. # Create new columns filled with NAs to be filled with data
  38. Xtemp <- X
  39. Xtemp[, (Xcolnum + 1):ADDnum] <- NA
  40. # Match the orf names in each row to a orf name in the shift data file and then add the shift data to the finalTable file
  41. shiftTbl < - as.data.frame(matrix(nrow = 1, ncol = length(Y) - 2)) #the df shiftTbl must be initialized before for loop
  42. for (i in 1:length(X[, 1])) {
  43. Shiftrownum <- match(X[i, 2], Y[, 1])
  44. shiftTbl[i, ] <- Y[Shiftrownum, 3:length(Y[1, ])]
  45. Xtemp[i, (Xcolnum + 1):ADDnum] <- Y[Shiftrownum, 3:length(Y[1, ])]
  46. }
  47. headerX <- colnames(Xtemp)
  48. headerY <- colnames(Y)
  49. shfHdr <- headerY[3:length(headerY)]
  50. combTbl <- X[, 1:3]
  51. lmTbl <- select(Xtemp, contains("Z_lm")) #X[,(4:Xcolnum-2)]
  52. shiftTbl <- select(Xtemp, contains("V"))
  53. clustTbl <- select(Xtemp, contains("cluster."))
  54. # Give the new column names the same names as in the shift file
  55. Xcols <- colnames(X)
  56. Ycols <- colnames(Y)[3:length(Y[1, ])]
  57. newCols <- c(Xcols[1:Xcolnum], Ycols)
  58. # Reorder columns for generating heatmaps
  59. combI <- combTbl #Starting Template orf, Genename columns
  60. headersRemc <- newCols #colnames(X)
  61. newHeaders <- newCols[1:3]
  62. lmHdr <- colnames(lmTbl) #newCols[4:(length(Xcols)-2)]
  63. clstHdr <- colnames(clustTbl) #select(newCols, contains('cluster.')) #newCols[3+length(lmHdr):2]
  64. intLvHdr <- vector()
  65. #Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
  66. for (i in 1:(length(shiftTbl[1, ]))) {
  67. combI <- cbind.data.frame(combI, shiftTbl[i])
  68. combI <- cbind.data.frame(combI, lmTbl[i])
  69. intLvHdrx <- c(shfHdr[i], lmHdr[i])
  70. intLvHdr <- c(intLvHdr, intLvHdrx)
  71. }
  72. combIHdr <- c(colnames(combTbl), intLvHdr, clstHdr)
  73. combI <- cbind.data.frame(combI, clustTbl)
  74. colnames(combI) <- combIHdr
  75. write.csv(combI, file = output, row.names = FALSE)