Rollup before parallelization

2024-08-14 23:20:29 -04:00
parent 1ba1f14537
commit 6992d5eec0
8 changed files with 2517 additions and 2434 deletions
--- a/workflow/apps/r/joinInteractExps.R
+++ b/workflow/apps/r/joinInteractExps.R
@@ -1,44 +1,45 @@
 #!/usr/bin/env Rscript
 # JoinInteractExps.R

-library(plyr)
-library(sos)
-library(dplyr)
+library("plyr")
+library("sos")
+library("dplyr")

 args <- commandArgs(TRUE)

 # Set output dir
 if (length(args) >= 1) {
-  outDir <- file.path(args[1])
+  out_dir <- file.path(args[1])
 } else {
-  outDir <- "./" # for legacy workflow
+  out_dir <- "./" # for legacy workflow
 }

 # Set sd value
 if (length(args) >= 2) {
-  sd <- args[2]
+  sd <- as.numeric(args[2])
 } else {
  sd <- 2 # default value
 }
-print(paste("SD=", sd))

-# Set studyInfo file
+sprintf("SD value is: %f", sd)
+
+# Set study_info file
 if (length(args) >= 3) {
-  studyInfo <- file.path(args[3])
+  study_info <- file.path(args[3])
 } else {
-  studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
+  study_info <- "../Code/StudyInfo.csv" # for legacy workflow
 }

 studies <- args[3:length(args)]
-inputFiles <- c()
+input_files <- c()
 for (study in 1:length(studies)) {
-  zsFile <- file.path(study, "zscores", "zscores_interaction.csv")
-  if (file.exists(zsFile)) {
-    inputFiles[study] <- zsFile
+  zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
+  if (file.exists(zs_file)) {
+    input_files[study] <- zs_file
  }
 }

-print(length(inputFiles))
+print(length(input_files))

 # TODO this is better handled in a loop in case you want to compare more experiments?
 # The input is already  designed for this
@@ -46,38 +47,38 @@ print(length(inputFiles))
 # Join the two files at a time as a function of how many inputFile
 # list the larger file first ? in this example X2 has the larger number of genes
 # If X1 has a larger number of genes, switch the order of X1 and X2
-if (length(inputFiles) == 2) {
-  X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE)
-  X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE)
+if (length(input_files) == 2) {
+  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
+  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
-  headSel <- select(headSel, -"Gene.1") #remove "Gene.1 column
+  headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
-  headSel2 <- select(headSel2, -"Gene.1") #remove "Gene.1 column   #Frame for interleaving Z_lm with Shift colums
-} else if (length(inputFiles) == 3) {
-  X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
-  X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
-  X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
+  headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column   #Frame for interleaving Z_lm with Shift colums
+} else if (length(input_files) == 3) {
+  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
+  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
+  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  X <- join(X, X3, by = "OrfRep")
-  OBH <- X[, order(colnames(X))]  #OrderByHeader
+  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
  headSel <- select(headSel, -"Gene.1", -"Gene.2")
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
  headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
  
-} else if (length(inputFiles) == 4) {
-  X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
-  X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
-  X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
-  X4 <- read.csv(file = inputFiles[4], stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
+} else if (length(input_files) == 4) {
+  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
+  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
+  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
+  X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  X <- join(X, X3, by = "OrfRep")
  X <- join(X, X4, by = "OrfRep")
-  OBH <- X[, order(colnames(X))]  #OrderByHeader
+  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
  headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3")
@@ -221,13 +222,13 @@ if (std == 0) {
 # R places hidden "" around the header names. The following
 # is intended to remove those quote so that the "" do not blow up the Java REMc.
 # Use ,quote=F in the write.csv statement to fix R output file.
-# write.csv(combI,file.path(outDir,"CombinedKLzscores.csv"), row.names = FALSE)
-write.csv(REMcRdy, file.path(outDir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
-write.csv(shiftOnly, file.path(outDir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
+write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
 #LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")

-LabelStd <- read.csv(file = studyInfo, stringsAsFactors = FALSE)
+LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
 print(std)
 LabelStd[, 4] <- as.numeric(std)
-write.csv(LabelStd, file = file.path(outDir, "parameters.csv"), row.names = FALSE)
-write.csv(LabelStd, file = studyInfo, row.names = FALSE)
+write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
+write.csv(LabelStd, file = study_info, row.names = FALSE)