Rollup before parallelization

2024-08-14 23:20:29 -04:00
parent 1ba1f14537
commit 6992d5eec0
8 changed files with 2517 additions and 2434 deletions
--- a/workflow/apps/r/TSHeatmaps5dev2.R
+++ b/workflow/apps/r/TSHeatmaps5dev2.R
@@ -8,17 +8,16 @@
 # @arg $2 string gene_ontology_edit.obo file
 # @arg $3 string go_terms.tab file
 # @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
-# @arg $5 string ZScores_interaction.csv
-# @arg $6 string base directory
-# @arg $7 string output directory
+# @arg $5 string base directory
+# @arg $6 string output directory

 library("ontologyIndex")
 library("ggplot2")
 library("RColorBrewer")
 library("grid")
 library("ggthemes")
-#library("plotly")
-#library("htmlwidgets")
+# library("plotly")
+# library("htmlwidgets")
 library("extrafont")
 library("stringr")
 library("org.Sc.sgd.db")
@@ -31,10 +30,9 @@ study_info_file <- args[1]
 ontology_file <- args[2]
 sgd_terms_tfile <- args[3]
 all_sgd_terms_csv <- args[4]
-zscores_file <- args[5]
-base_dir <- args[6]
-output_dir <- args[7]
-study_nums <- args[8:length(args)]
+base_dir <- args[5]
+output_dir <- args[6]
+study_nums <- args[7:length(args)]

 # Import standard tables used in Sean's code That should be copied to each ExpStudy
 labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
@@ -52,7 +50,7 @@ XX3[, 2] <- gsub(pattern = "/", replacement = "_", x = XX3[, 2])

 # Load input files
 for (study_num in study_nums) {
-  input_file <- file.path(base_dir, paste("Exp", study_num), zscores_file)
+  input_file <- file.path(base_dir, paste("Exp", study_num), zscores, "zscores_interaction.csv")
  if (file.exists(input_file)) {
    assign(paste(X, study_num), read.csv(file = input_file, stringsAsFactors = FALSE, header = TRUE))
    assign(paste(Name, study_num), labels[study_num, 2])
@@ -206,10 +204,10 @@ if (length(study_nums) > 1) {
  try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
  X_heatmap <-
    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
-    colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
-    colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
-    colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
-    colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
  
  X_heatmap <- X_heatmap[, c(10, 1, 4, 5, 8, 9, 2, 3, 6, 7)]
  colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
@@ -226,12 +224,12 @@ if (length(study_nums) > 2) {
  
  X_heatmap <-
    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
-    colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
-    colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
-    colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
-    colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
-    colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
-    colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"]
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+      colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"]

  # Reorder columns
  X_heatmap <- X_heatmap[, c(14, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11)]
@@ -252,14 +250,14 @@ if (length(study_nums) > 3) {
  
  X_heatmap <-
    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
-    colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
-    colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
-    colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
-    colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
-    colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
-    colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
-    colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
-    colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"]
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
+      colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+      colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
+      colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"]

  # Reorder columns
  X_heatmap <- X_heatmap[, c(18, 1, 4, 5, 8, 9, 12, 13, 16, 17, 2, 3, 6, 7, 10, 11, 14, 15)]
@@ -283,16 +281,16 @@ if (length(study_nums) > 4) {
  
  X_heatmap <-
    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
-    colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
-    colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
-    colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
-    colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
-    colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
-    colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
-    colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
-    colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
-    colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
-    colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
+      colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+      colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+      colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
+      colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
+      colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]

  # Reorder columns
  X_heatmap <- X_heatmap[, c(22, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19)]
@@ -441,7 +439,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size > 2000) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 45, onefile = TRUE)
+
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 45,
+      onefile = TRUE
+    )
+
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -461,7 +466,7 @@ for (s in 1:dim(XX3)[1]) {
          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
          na.color = "red", col = brewer.pal(11, "PuOr"),
          main = GO_Term_Name,
-          #ColSideColors = ev_repeat,
+          # ColSideColors = ev_repeat,
          labRow = as.character(Genes_Annotated_to_Term$Gene)
        ))
      }
@@ -470,7 +475,14 @@ for (s in 1:dim(XX3)[1]) {
  }
  
  if (Parent_Size >= 1000 && Parent_Size <= 2000) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 35, onefile = TRUE)
+
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 35,
+      onefile = TRUE
+    )
+
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -490,7 +502,7 @@ for (s in 1:dim(XX3)[1]) {
          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
          na.color = "red", col = brewer.pal(11, "PuOr"),
          main = GO_Term_Name,
-          #ColSideColors = ev_repeat,
+          # ColSideColors = ev_repeat,
          labRow = as.character(Genes_Annotated_to_Term$Gene)
        ))
      }
@@ -499,7 +511,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size >= 500 && Parent_Size <= 1000) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 30, onefile = TRUE)
+    
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 30,
+      onefile = TRUE
+    )
+
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -519,7 +538,7 @@ for (s in 1:dim(XX3)[1]) {
          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
          na.color = "red", col = brewer.pal(11, "PuOr"),
          main = GO_Term_Name,
-          #ColSideColors = ev_repeat,
+          # ColSideColors = ev_repeat,
          labRow = as.character(Genes_Annotated_to_Term$Gene)
        ))
      }
@@ -528,7 +547,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size >= 200 && Parent_Size <= 500) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 25, onefile = TRUE)
+    
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 25,
+      onefile = TRUE
+    )
+
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -548,7 +574,7 @@ for (s in 1:dim(XX3)[1]) {
          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
          na.color = "red", col = brewer.pal(11, "PuOr"),
          main = GO_Term_Name,
-          #ColSideColors = ev_repeat,
+          # ColSideColors = ev_repeat,
          labRow = as.character(Genes_Annotated_to_Term$Gene)
        ))
      }
@@ -557,7 +583,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size >= 100 && Parent_Size <= 200) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 20, onefile = TRUE)
+    
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 20,
+      onefile = TRUE
+    )
+    
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -577,7 +610,7 @@ for (s in 1:dim(XX3)[1]) {
          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
          na.color = "red", col = brewer.pal(11, "PuOr"),
          main = GO_Term_Name,
-          #ColSideColors = ev_repeat,
+          # ColSideColors = ev_repeat,
          labRow = as.character(Genes_Annotated_to_Term$Gene)
        ))
      }
@@ -586,7 +619,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size >= 60 && Parent_Size <= 100) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 15, onefile = TRUE)
+    
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 15,
+      onefile = TRUE
+    )
+    
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -606,7 +646,7 @@ for (s in 1:dim(XX3)[1]) {
          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
          na.color = "red", col = brewer.pal(11, "PuOr"),
          main = GO_Term_Name,
-          #ColSideColors = ev_repeat,
+          # ColSideColors = ev_repeat,
          labRow = as.character(Genes_Annotated_to_Term$Gene)
        ))
      }
@@ -615,7 +655,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size >= 30 && Parent_Size <= 60) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 10, onefile = TRUE)
+    
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 10,
+      onefile = TRUE
+    )
+    
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -650,7 +697,7 @@ for (s in 1:dim(XX3)[1]) {
          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
          na.color = "red", col = brewer.pal(11, "PuOr"),
          main = GO_Term_Name,
-          #ColSideColors = ev_repeat,
+          # ColSideColors = ev_repeat,
          labRow = as.character(Genes_Annotated_to_Term$Gene)
        ))
      }
@@ -660,7 +707,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size >= 3 && Parent_Size <= 30) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE)
+    
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 7,
+      onefile = TRUE
+    )
+    
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
@@ -704,7 +758,14 @@ for (s in 1:dim(XX3)[1]) {
  }

  if (Parent_Size == 2) {
-    pdf(file = paste(output_dir, XX3[s, 2], ".pdf", sep = ""), width = 12, height = 7, onefile = TRUE)
+    
+    pdf(
+      file = file.path(output_dir, paste(XX3[s, 2], ".pdf", sep = "")),
+      width = 12,
+      height = 7,
+      onefile = TRUE
+    )
+    
    for (i in 1:length(GOTerm_parent)) {
      GO_Term <- GOTerm_parent[i]
      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
--- a/workflow/apps/r/createHeatMapsHomology.R
+++ b/workflow/apps/r/createHeatMapsHomology.R
@@ -1,28 +1,27 @@
 #!/usr/bin/env Rscript
-# This script will make homology heatmaps for the REMc analysis
-# This script didn't have any hard set inputs so I didn't bother

-library(RColorBrewer)
-library(gplots)
-library(tidyverse)
+library("RColorBrewer")
+library("gplots")
+library("tidyverse")

 args <- commandArgs(TRUE)
-# Need to give the input "finalTable.csv" file after running REMc generated by eclipse
-inputFinalTable <- file.path(args[1])
-
-# Give the DAmP_list.txt as the third argument - will color the gene names differently
-DAmPs <- file.path(Args[2])
-DAmP_list <- read.delim(file = DAmPs, header = FALSE, stringsAsFactors = FALSE)
-
-# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps
-mapFile <- file.path(Args[3])
-mapping <- read.csv(file = mapFile, stringsAsFactors = FALSE)

 # Define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
-outputPath <- file.path(Args[4])
+output_path <- file.path(Args[1])
+
+# Need to give the input "finalTable.csv" file after running REMc generated by eclipse
+final_table <- file.path(args[2])
+
+# Give the damp_list.txt as the third argument - will color the gene names differently
+damps <- file.path(Args[3])
+damp_list <- read.delim(file = damps, header = FALSE, stringsAsFactors = FALSE)
+
+# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps
+map_file <- file.path(Args[4])
+mapping <- read.csv(file = map_file, stringsAsFactors = FALSE)

 # Read in finalTablewithShift
-hmapfile <- data.frame(read.csv(file = inputFinalTable, header = TRUE, sep = ",", stringsAsFactors = FALSE))
+hmapfile <- data.frame(read.csv(file = final_table, header = TRUE, sep = ",", stringsAsFactors = FALSE))

 # Map the finalTable to the human homolog file
 hmapfile_map <- hmapfile
@@ -46,11 +45,11 @@ hmapfile_w_homolog <- full_join(hmapfile_map, mapping, by = c("ORFMatch" = "ense
 hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$likelihood) == FASLE, ]

 # Write csv with all info from mapping file
-write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologAll.csv", sep = "")), row.names = FALSE)
+write.csv(hmapfile_w_homolog, file.path(output_path, paste(final_table, "_WithHomologAll.csv", sep = "")), row.names = FALSE)

 # Remove the non matches and output another mapping file - this is also one used to make heatmaps
 hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$external_gene_name_Human) == FALSE, ]
-write.csv(hmapfile_w_homolog, file.path(outputPath, paste(inputFinalTable, "_WithHomologMatchesOnly.csv", sep = ""), row.names = FALSE))
+write.csv(hmapfile_w_homolog, file.path(output_path, paste(final_table, "_WithHomologMatchesOnly.csv", sep = ""), row.names = FALSE))

 # Add human gene name to the Gene column
 hmapfile_w_homolog$Gene <- paste(hmapfile_w_homolog$Gene, hmapfile_w_homolog$external_gene_name_Human, sep = "/")
@@ -176,14 +175,14 @@ if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == FALSE) {
 # m <- 0
 colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1, ]) - 3)])

-colnames(DAmP_list)[1] <- "ORF"
-hmapfile$DAmPs <- "YKO"
+colnames(damp_list)[1] <- "ORF"
+hmapfile$damps <- "YKO"
 colnames(hmapfile)[2] <- "ORF"
-try(hmapfile[hmapfile$ORF %in% DAmP_list$ORF, ]$DAmPs <- "YKD")
-# X <- X[order(X$DAmPs,decreasing = TRUE),]
+try(hmapfile[hmapfile$ORF %in% damp_list$ORF, ]$damps <- "YKD")
+# X <- X[order(X$damps,decreasing = TRUE),]
 hmapfile$color2 <- NA
-try(hmapfile[hmapfile$DAmPs == "YKO", ]$color2 <- "black")
-try(hmapfile[hmapfile$DAmPs == "YKD", ]$color2 <- "red")
+try(hmapfile[hmapfile$damps == "YKO", ]$color2 <- "black")
+try(hmapfile[hmapfile$damps == "YKD", ]$color2 <- "red")

 hmapfile$color <- NA
 try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_many2many", ]$color <- "#F8766D")
@@ -231,7 +230,7 @@ for (i in 1:num_unique_clusts) {
  if (cluster_length != 1) {
    X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 6)])
    if (cluster_length >= 2001) {
-      mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
+      mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
      pdf(file = mypath, height = 20, width = 15)
      heatmap.2(
        x = X0,
@@ -251,7 +250,7 @@ for (i in 1:num_unique_clusts) {
      dev.off()
    }
    if (cluster_length >= 201 && cluster_length <= 2000) {
-      mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
+      mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
      pdf(file = mypath, height = 15, width = 12)
      heatmap.2(
        x = X0,
@@ -270,7 +269,7 @@ for (i in 1:num_unique_clusts) {
      dev.off()
    }
    if (cluster_length >= 150 && cluster_length <= 200) {
-      mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
+      mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
      pdf(file = mypath, height = 12, width = 12)
      heatmap.2(
        x = X0,
@@ -288,7 +287,7 @@ for (i in 1:num_unique_clusts) {
      dev.off()
    }
    if (cluster_length >= 101 && cluster_length <= 149) {
-      mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
+      mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
      pdf(file = mypath, height = 12, width = 12)
      heatmap.2(
        x = X0,
@@ -306,7 +305,7 @@ for (i in 1:num_unique_clusts) {
      dev.off()
    }
    if (cluster_length >= 60 && cluster_length <= 100) {
-      mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
+      mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
      pdf(file = mypath, height = 12, width = 12)
      heatmap.2(
        x = X0,
@@ -324,7 +323,7 @@ for (i in 1:num_unique_clusts) {
      dev.off()
    }
    if (cluster_length <= 59 && cluster_length >= 30) {
-      mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
+      mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
      pdf(file = mypath, height = 9, width = 12)
      heatmap.2(
        x = X0,
@@ -342,7 +341,7 @@ for (i in 1:num_unique_clusts) {
      dev.off()
    }
    if (cluster_length <= 29) {
-      mypath <- file.path(outputPath, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
+      mypath <- file.path(output_path, paste("cluster_", gsub(" ", "", cluster), ".pdf", sep = ""))
      pdf(file = mypath, height = 7, width = 12)
      heatmap.2(
        x = X0,
--- a/workflow/apps/r/gtaTemplate.R
+++ b/workflow/apps/r/gtaTemplate.R
@@ -50,7 +50,7 @@ if (length(args) >= 5) {
 # ZScores_Interaction.csv
 for (m in 1:length(zscores_file)) {
  
-  #zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="")  #ArgsScore[1]
+  # zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="")  #ArgsScore[1]
  X <- read.csv(file = zscores_file[m], stringsAsFactors = FALSE, header = TRUE)
  
  if (colnames(X)[1] == "OrfRep") {
--- a/workflow/apps/r/interactions.R
+++ b/workflow/apps/r/interactions.R
--- a/workflow/apps/r/joinInteractExps.R
+++ b/workflow/apps/r/joinInteractExps.R
@@ -1,44 +1,45 @@
 #!/usr/bin/env Rscript
 # JoinInteractExps.R

-library(plyr)
-library(sos)
-library(dplyr)
+library("plyr")
+library("sos")
+library("dplyr")

 args <- commandArgs(TRUE)

 # Set output dir
 if (length(args) >= 1) {
-  outDir <- file.path(args[1])
+  out_dir <- file.path(args[1])
 } else {
-  outDir <- "./" # for legacy workflow
+  out_dir <- "./" # for legacy workflow
 }

 # Set sd value
 if (length(args) >= 2) {
-  sd <- args[2]
+  sd <- as.numeric(args[2])
 } else {
  sd <- 2 # default value
 }
-print(paste("SD=", sd))

-# Set studyInfo file
+sprintf("SD value is: %f", sd)
+
+# Set study_info file
 if (length(args) >= 3) {
-  studyInfo <- file.path(args[3])
+  study_info <- file.path(args[3])
 } else {
-  studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
+  study_info <- "../Code/StudyInfo.csv" # for legacy workflow
 }

 studies <- args[3:length(args)]
-inputFiles <- c()
+input_files <- c()
 for (study in 1:length(studies)) {
-  zsFile <- file.path(study, "zscores", "zscores_interaction.csv")
-  if (file.exists(zsFile)) {
-    inputFiles[study] <- zsFile
+  zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
+  if (file.exists(zs_file)) {
+    input_files[study] <- zs_file
  }
 }

-print(length(inputFiles))
+print(length(input_files))

 # TODO this is better handled in a loop in case you want to compare more experiments?
 # The input is already  designed for this
@@ -46,38 +47,38 @@ print(length(inputFiles))
 # Join the two files at a time as a function of how many inputFile
 # list the larger file first ? in this example X2 has the larger number of genes
 # If X1 has a larger number of genes, switch the order of X1 and X2
-if (length(inputFiles) == 2) {
-  X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE)
-  X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE)
+if (length(input_files) == 2) {
+  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
+  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
-  headSel <- select(headSel, -"Gene.1") #remove "Gene.1 column
+  headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
-  headSel2 <- select(headSel2, -"Gene.1") #remove "Gene.1 column   #Frame for interleaving Z_lm with Shift colums
-} else if (length(inputFiles) == 3) {
-  X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
-  X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
-  X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
+  headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column   #Frame for interleaving Z_lm with Shift colums
+} else if (length(input_files) == 3) {
+  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
+  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
+  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  X <- join(X, X3, by = "OrfRep")
-  OBH <- X[, order(colnames(X))]  #OrderByHeader
+  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
  headSel <- select(headSel, -"Gene.1", -"Gene.2")
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
  headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
  
-} else if (length(inputFiles) == 4) {
-  X1 <- read.csv(file = inputFiles[1], stringsAsFactors = FALSE) #exp1File,stringsAsFactors = FALSE)
-  X2 <- read.csv(file = inputFiles[2], stringsAsFactors = FALSE) #exp2File,stringsAsFactors = FALSE)
-  X3 <- read.csv(file = inputFiles[3], stringsAsFactors = FALSE) #exp3File,stringsAsFactors = FALSE)
-  X4 <- read.csv(file = inputFiles[4], stringsAsFactors = FALSE) #exp4File,stringsAsFactors = FALSE)
+} else if (length(input_files) == 4) {
+  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
+  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
+  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
+  X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  X <- join(X, X3, by = "OrfRep")
  X <- join(X, X4, by = "OrfRep")
-  OBH <- X[, order(colnames(X))]  #OrderByHeader
+  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
    contains("Z_lm_K"), contains("Z_Shift_K"), contains("Z_lm_L"), contains("Z_Shift_L"))
  headSel <- select(headSel, -"Gene.1", -"Gene.2", -"Gene.3")
@@ -221,13 +222,13 @@ if (std == 0) {
 # R places hidden "" around the header names. The following
 # is intended to remove those quote so that the "" do not blow up the Java REMc.
 # Use ,quote=F in the write.csv statement to fix R output file.
-# write.csv(combI,file.path(outDir,"CombinedKLzscores.csv"), row.names = FALSE)
-write.csv(REMcRdy, file.path(outDir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
-write.csv(shiftOnly, file.path(outDir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
+write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
 #LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")

-LabelStd <- read.csv(file = studyInfo, stringsAsFactors = FALSE)
+LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
 print(std)
 LabelStd[, 4] <- as.numeric(std)
-write.csv(LabelStd, file = file.path(outDir, "parameters.csv"), row.names = FALSE)
-write.csv(LabelStd, file = studyInfo, row.names = FALSE)
+write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
+write.csv(LabelStd, file = study_info, row.names = FALSE)