Rollup before parallelization
This commit is contained in:
@@ -1,44 +1,45 @@
|
||||
#!/usr/bin/env Rscript
|
||||
# JoinInteractExps.R
|
||||
|
||||
library(plyr)
|
||||
library(sos)
|
||||
library(dplyr)
|
||||
library("plyr")
|
||||
library("sos")
|
||||
library("dplyr")
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
# Set output dir
|
||||
if (length(args) >= 1) {
|
||||
outDir <- file.path(args[1])
|
||||
out_dir <- file.path(args[1])
|
||||
} else {
|
||||
outDir <- "./" # for legacy workflow
|
||||
out_dir <- "./" # for legacy workflow
|
||||
}
|
||||
|
||||
# Set sd value
|
||||
if (length(args) >= 2) {
|
||||
sd <- args[2]
|
||||
sd <- as.numeric(args[2])
|
||||
} else {
|
||||
sd <- 2 # default value
|
||||
}
|
||||
print(paste("SD=", sd))
|
||||
|
||||
# Set studyInfo file
|
||||
sprintf("SD value is: %f", sd)
|
||||
|
||||
# Set study_info file
|
||||
if (length(args) >= 3) {
|
||||
studyInfo <- file.path(args[3])
|
||||
study_info <- file.path(args[3])
|
||||
} else {
|
||||
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
|
||||
study_info <- "../Code/StudyInfo.csv" # for legacy workflow
|
||||
}
|
||||
|
||||
studies <- args[3:length(args)]
|
||||
inputFiles <- c()
|
||||
input_files <- c()
|
||||
for (study in 1:length(studies)) {
|
||||
zsFile <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||
if (file.exists(zsFile)) {
|
||||
inputFiles[study] <- zsFile
|
||||
zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||
if (file.exists(zs_file)) {
|
||||
input_files[study] <- zs_file
|
||||
}
|
||||
}
|
||||
|
||||
print(length(inputFiles))
|
||||
print(length(input_files))
|
||||
|
||||
# TODO this is better handled in a loop in case you want to compare more experiments?
|
||||
# The input is already designed for this
|
||||
@@ -46,38 +47,38 @@ print(length(inputFiles))
|
||||
# Join the two files at a time as a function of how many inputFile
|
||||
# list the larger file first ? in this example X2 has the larger number of genes
|
||||
# If X1 has a larger number of genes, switch the order of X1 and X2
|
||||
if (length(inputFiles) == 2) {
|
||||
X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE)
|
||||
if (length(input_files) == 2) {
|
||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
|
||||
X <- join(X1, X2, by = "OrfRep")
|
||||
OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
|
||||
headSel <- select(headSel, -"Gene.1") #remove "Gene.1 column
|
||||
headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
|
||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
|
||||
headSel2 <- select(headSel2, -"Gene.1") #remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
|
||||
} else if (length(inputFiles) == 3) {
|
||||
X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
|
||||
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
|
||||
} else if (length(input_files) == 3) {
|
||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
|
||||
X <- join(X1, X2, by = "OrfRep")
|
||||
X <- join(X, X3, by = "OrfRep")
|
||||
OBH <- X[, order(colnames(X))] #OrderByHeader
|
||||
OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
|
||||
headSel <- select(headSel, -"Gene.1", -"Gene.2")
|
||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
|
||||
|
||||
} else if (length(inputFiles) == 4) {
|
||||
X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
|
||||
X4 <- read.csv(file = inputFiles[4], stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
|
||||
} else if (length(input_files) == 4) {
|
||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
|
||||
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
|
||||
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE)
|
||||
X <- join(X1, X2, by = "OrfRep")
|
||||
X <- join(X, X3, by = "OrfRep")
|
||||
X <- join(X, X4, by = "OrfRep")
|
||||
OBH <- X[, order(colnames(X))] #OrderByHeader
|
||||
OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
|
||||
headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3")
|
||||
@@ -221,13 +222,13 @@ if (std == 0) {
|
||||
# R places hidden "" around the header names. The following
|
||||
# is intended to remove those quote so that the "" do not blow up the Java REMc.
|
||||
# Use ,quote=F in the write.csv statement to fix R output file.
|
||||
# write.csv(combI,file.path(outDir,"CombinedKLzscores.csv"), row.names = FALSE)
|
||||
write.csv(REMcRdy, file.path(outDir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
write.csv(shiftOnly, file.path(outDir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
|
||||
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
#LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
|
||||
|
||||
LabelStd <- read.csv(file = studyInfo, stringsAsFactors = FALSE)
|
||||
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
|
||||
print(std)
|
||||
LabelStd[, 4] <- as.numeric(std)
|
||||
write.csv(LabelStd, file = file.path(outDir, "parameters.csv"), row.names = FALSE)
|
||||
write.csv(LabelStd, file = studyInfo, row.names = FALSE)
|
||||
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
|
||||
write.csv(LabelStd, file = study_info, row.names = FALSE)
|
||||
|
||||
Reference in New Issue
Block a user