Rollup before parallelization

This commit is contained in:
2024-08-14 23:20:29 -04:00
parent 1ba1f14537
commit 6992d5eec0
8 changed files with 2517 additions and 2434 deletions

View File

@@ -1,44 +1,45 @@
#!/usr/bin/env Rscript
# JoinInteractExps.R
library(plyr)
library(sos)
library(dplyr)
library("plyr")
library("sos")
library("dplyr")
args <- commandArgs(TRUE)
# Set output dir
if (length(args) >= 1) {
outDir <- file.path(args[1])
out_dir <- file.path(args[1])
} else {
outDir <- "./" # for legacy workflow
out_dir <- "./" # for legacy workflow
}
# Set sd value
if (length(args) >= 2) {
sd <- args[2]
sd <- as.numeric(args[2])
} else {
sd <- 2 # default value
}
print(paste("SD=", sd))
# Set studyInfo file
sprintf("SD value is: %f", sd)
# Set study_info file
if (length(args) >= 3) {
studyInfo <- file.path(args[3])
study_info <- file.path(args[3])
} else {
studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
study_info <- "../Code/StudyInfo.csv" # for legacy workflow
}
studies <- args[3:length(args)]
inputFiles <- c()
input_files <- c()
for (study in 1:length(studies)) {
zsFile <- file.path(study, "zscores", "zscores_interaction.csv")
if (file.exists(zsFile)) {
inputFiles[study] <- zsFile
zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
if (file.exists(zs_file)) {
input_files[study] <- zs_file
}
}
print(length(inputFiles))
print(length(input_files))
# TODO this is better handled in a loop in case you want to compare more experiments?
# The input is already designed for this
@@ -46,38 +47,38 @@ print(length(inputFiles))
# Join the two files at a time as a function of how many inputFile
# list the larger file first ? in this example X2 has the larger number of genes
# If X1 has a larger number of genes, switch the order of X1 and X2
if (length(inputFiles) == 2) {
X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE)
X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE)
if (length(input_files) == 2) {
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep")
OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
headSel <- select(headSel, -"Gene.1") #remove "Gene.1 column
headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
headSel2 <- select(headSel2, -"Gene.1") #remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
} else if (length(inputFiles) == 3) {
X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
} else if (length(input_files) == 3) {
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep")
X <- join(X, X3, by = "OrfRep")
OBH <- X[, order(colnames(X))] #OrderByHeader
OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
headSel <- select(headSel, -"Gene.1", -"Gene.2")
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
} else if (length(inputFiles) == 4) {
X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
X4 <- read.csv(file = inputFiles[4], stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
} else if (length(input_files) == 4) {
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep")
X <- join(X, X3, by = "OrfRep")
X <- join(X, X4, by = "OrfRep")
OBH <- X[, order(colnames(X))] #OrderByHeader
OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3")
@@ -221,13 +222,13 @@ if (std == 0) {
# R places hidden "" around the header names. The following
# is intended to remove those quote so that the "" do not blow up the Java REMc.
# Use ,quote=F in the write.csv statement to fix R output file.
# write.csv(combI,file.path(outDir,"CombinedKLzscores.csv"), row.names = FALSE)
write.csv(REMcRdy, file.path(outDir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(outDir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
#LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
LabelStd <- read.csv(file = studyInfo, stringsAsFactors = FALSE)
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
print(std)
LabelStd[, 4] <- as.numeric(std)
write.csv(LabelStd, file = file.path(outDir, "parameters.csv"), row.names = FALSE)
write.csv(LabelStd, file = studyInfo, row.names = FALSE)
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
write.csv(LabelStd, file = study_info, row.names = FALSE)