Rollup for java clustering

This commit is contained in:
2024-08-16 17:07:57 -04:00
parent 38b3f66695
commit d1380f9c3b
6032 changed files with 23267 additions and 924 deletions

View File

@@ -55,8 +55,8 @@ print(length(input_files))
# Join the two files at a time as a function of how many inputFile
# list the larger file first ? in this example X2 has the larger number of genes
# If X1 has a larger number of genes, switch the order of X1 and X2
if (length(input_files) == 1) {
print("Only one experiment to compare, skipping join")
if (length(input_files) < 2) {
print("Note enough Exps to compare, skipping join")
stop("Exiting script")
}
@@ -65,20 +65,17 @@ if (length(input_files) >= 2) {
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep")
OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
headers <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column # frame for interleaving Z_lm with Shift colums
}
if (length(input_files) >= 3) {
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
X <- join(X, X3, by = "OrfRep")
OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
headSel <- select(headSel, -"Gene.1", -"Gene.2")
headSel <- select(headers, -"Gene.1", -"Gene.2")
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
}
@@ -86,16 +83,15 @@ if (length(input_files) >= 3) {
if (length(input_files) >= 4) {
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
X <- join(X, X4, by = "OrfRep")
OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3")
headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
}
print(headers)
# headSel$contains("Z_Shift") %>% replace_na(0.001)
headers <- colnames(headSel)
print(headers)
i <- 0
for (i in 1:length(headers)) {
if (grepl("Shift", headers[i])) {
@@ -107,8 +103,8 @@ for (i in 1:length(headers)) {
}
# 2SD option code to exclude Z_lm values less than 2 standard Deviations
REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_lm_"))
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_Shift"))
REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_"))
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift"))
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")