Rollup for java clustering

2024-08-16 17:07:57 -04:00
parent 38b3f66695
commit d1380f9c3b
6032 changed files with 23267 additions and 924 deletions
--- a/workflow/apps/r/joinInteractExps.R
+++ b/workflow/apps/r/joinInteractExps.R
@@ -55,8 +55,8 @@ print(length(input_files))
 # Join the two files at a time as a function of how many inputFile
 # list the larger file first ? in this example X2 has the larger number of genes
 # If X1 has a larger number of genes, switch the order of X1 and X2
-if (length(input_files) == 1) {
-  print("Only one experiment to compare, skipping join")
+if (length(input_files) < 2) {
+  print("Note enough Exps to compare, skipping join")
  stop("Exiting script")
 }

@@ -65,20 +65,17 @@ if (length(input_files) >= 2) {
  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  OBH <- X[, order(colnames(X))]  # OrderByHeader
-  headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
-    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
-  headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
-  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
-  headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column   #Frame for interleaving Z_lm with Shift colums
+  headers <- select(OBH, contains("OrfRep"), matches("Gene"),
+    contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
+  headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
+  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
+  headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column  # frame for interleaving Z_lm with Shift colums
 }

 if (length(input_files) >= 3) {
  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
  X <- join(X, X3, by = "OrfRep")
-  OBH <- X[, order(colnames(X))]  # OrderByHeader
-  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
-    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
-  headSel <- select(headSel, -"Gene.1", -"Gene.2")
+  headSel <- select(headers, -"Gene.1", -"Gene.2")
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
  headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
 }
@@ -86,16 +83,15 @@ if (length(input_files) >= 3) {
 if (length(input_files) >= 4) {
  X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
  X <- join(X, X4, by = "OrfRep")
-  OBH <- X[, order(colnames(X))]  # OrderByHeader
-  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
-    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
-  headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3")
+  headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
  headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
 }

+print(headers)
 # headSel$contains("Z_Shift") %>% replace_na(0.001)
 headers <- colnames(headSel)
+print(headers)
 i <- 0
 for (i in 1:length(headers)) {
  if (grepl("Shift", headers[i])) {
@@ -107,8 +103,8 @@ for (i in 1:length(headers)) {
 }

 # 2SD option code to exclude Z_lm values less than 2 standard Deviations
-REMcRdy <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("Z_lm_"))
-shiftOnly <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("Z_Shift"))
+REMcRdy <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_lm_"))
+shiftOnly <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_shift"))

 # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
 Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")