Rollup before removing NAs from joinInteractExps.R

2024-08-15 15:02:53 -04:00
parent 6992d5eec0
commit 38b3f66695
3 changed files with 169 additions and 117 deletions
--- a/workflow/apps/r/interactions.R
+++ b/workflow/apps/r/interactions.R
@@ -27,7 +27,7 @@ sgd_gene_list <- file.path(args[4])
 input_file <- file.path(args[5])
 out_dir <- file.path(args[6])
-sprintf("The Standard Deviation value is: %f", delta_bg_factor)
+sprintf("The Standard Deviation value is: %d", delta_bg_factor)
 out_dir_qc <- file.path(out_dir, "qc")
--- a/workflow/apps/r/joinInteractExps.R
+++ b/workflow/apps/r/joinInteractExps.R
@@ -21,7 +21,7 @@ if (length(args) >= 2) {
  sd <- 2 # default value
 }
-sprintf("SD value is: %f", sd)
+sprintf("SD value is: %d", sd)
 # Set study_info file
 if (length(args) >= 3) {
@@ -30,15 +30,23 @@ if (length(args) >= 3) {
  study_info <- "../Code/StudyInfo.csv" # for legacy workflow
 }
-studies <- args[3:length(args)]
+studies <- args[4:length(args)]
 print(studies)
 input_files <- c()
-for (study in 1:length(studies)) {
+for (i in seq_along(studies)) {
  study <- studies[i]
  zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
  if (file.exists(zs_file)) {
-    input_files[study] <- zs_file
+    input_files[i] <- zs_file
  }
 }
 rm(zs_file, study)
 for (var in ls()) {
  print(paste(var, ":", get(var)))
 }
 print(input_files)
 print(length(input_files))
 # TODO this is better handled in a loop in case you want to compare more experiments?
@@ -47,7 +55,12 @@ print(length(input_files))
 # Join the two files at a time as a function of how many inputFile
 # list the larger file first ? in this example X2 has the larger number of genes
 # If X1 has a larger number of genes, switch the order of X1 and X2
-if (length(input_files) == 2) {
+if (length(input_files) == 1) {
  print("Only one experiment to compare, skipping join")
  stop("Exiting script")
 }
 if (length(input_files) >= 2) {
  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
@@ -57,11 +70,10 @@ if (length(input_files) == 2) {
  headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
  headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column   #Frame for interleaving Z_lm with Shift colums
-} else if (length(input_files) == 3) {
+}
-  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
+
-  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
+if (length(input_files) >= 3) {
-  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
+  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  X <- join(X, X3, by = "OrfRep")
  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
@@ -69,14 +81,10 @@ if (length(input_files) == 2) {
  headSel <- select(headSel, -"Gene.1", -"Gene.2")
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
  headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
 }
-} else if (length(input_files) == 4) {
+if (length(input_files) >= 4) {
-  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
+  X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
  X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE)
  X <- join(X1, X2, by = "OrfRep")
  X <- join(X, X3, by = "OrfRep")
  X <- join(X, X4, by = "OrfRep")
  OBH <- X[, order(colnames(X))]  # OrderByHeader
  headSel <- select(OBH,  contains("OrfRep"), matches("Gene"),
@@ -103,7 +111,7 @@ REMcRdy <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("Z_lm_
 shiftOnly <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("Z_Shift"))
 # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
-Labels <- read.csv(file = "../Code/StudyInfo.csv", stringsAsFactors = FALSE, sep = ",")
+Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
 # Using Text search grepl to relabel headers
 REMcRdyHdr <- colnames(REMcRdy)
@@ -168,10 +176,10 @@ Vec7 <- NA
 Vec8 <- NA
 if (length(REMcRdy) == 6) {
-  Vec1 <- abs(REMcRdy[, 3]) >= std
+  Vec1 <- abs(REMcRdy[, 3]) >= sd
-  Vec2 <- abs(REMcRdy[, 4]) >= std
+  Vec2 <- abs(REMcRdy[, 4]) >= sd
-  Vec3 <- abs(REMcRdy[, 5]) >= std
+  Vec3 <- abs(REMcRdy[, 5]) >= sd
-  Vec4 <- abs(REMcRdy[, 6]) >= std
+  Vec4 <- abs(REMcRdy[, 6]) >= sd
  bolVec <- Vec1 | Vec2 | Vec3 | Vec4
  REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
  REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
@@ -180,12 +188,12 @@ if (length(REMcRdy) == 6) {
 }
 if (length(REMcRdy) == 8) {
-  Vec1 <- abs(REMcRdy[, 3]) >= std
+  Vec1 <- abs(REMcRdy[, 3]) >= sd
-  Vec2 <- abs(REMcRdy[, 4]) >= std
+  Vec2 <- abs(REMcRdy[, 4]) >= sd
-  Vec3 <- abs(REMcRdy[, 5]) >= std
+  Vec3 <- abs(REMcRdy[, 5]) >= sd
-  Vec4 <- abs(REMcRdy[, 6]) >= std
+  Vec4 <- abs(REMcRdy[, 6]) >= sd
-  Vec5 <- abs(REMcRdy[, 7]) >= std
+  Vec5 <- abs(REMcRdy[, 7]) >= sd
-  Vec6 <- abs(REMcRdy[, 8]) >= std
+  Vec6 <- abs(REMcRdy[, 8]) >= sd
  bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
  REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
  REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
@@ -194,14 +202,14 @@ if (length(REMcRdy) == 8) {
 }
 if (length(REMcRdy) == 10) {
-  Vec1 <- abs(REMcRdy[, 3]) >= std
+  Vec1 <- abs(REMcRdy[, 3]) >= sd
-  Vec2 <- abs(REMcRdy[, 4]) >= std
+  Vec2 <- abs(REMcRdy[, 4]) >= sd
-  Vec3 <- abs(REMcRdy[, 5]) >= std
+  Vec3 <- abs(REMcRdy[, 5]) >= sd
-  Vec4 <- abs(REMcRdy[, 6]) >= std
+  Vec4 <- abs(REMcRdy[, 6]) >= sd
-  Vec5 <- abs(REMcRdy[, 7]) >= std
+  Vec5 <- abs(REMcRdy[, 7]) >= sd
-  Vec6 <- abs(REMcRdy[, 8]) >= std
+  Vec6 <- abs(REMcRdy[, 8]) >= sd
-  Vec7 <- abs(REMcRdy[, 9]) >= std
+  Vec7 <- abs(REMcRdy[, 9]) >= sd
-  Vec8 <- abs(REMcRdy[, 10]) >= std
+  Vec8 <- abs(REMcRdy[, 10]) >= sd
  bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
  REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
  REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
@@ -209,12 +217,12 @@ if (length(REMcRdy) == 10) {
  shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
 }
-if (std != 0) {
+if (sd != 0) {
  REMcRdy <- REMcRdyGT2  # [,2:length(REMcRdyGT2)]
  shiftOnly <- shiftOnlyGT2  # [,2:length(shiftOnlyGT2)]
 }
-if (std == 0) {
+if (sd == 0) {
  REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
  shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
 }
@@ -228,7 +236,7 @@ write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, qu
 #LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
 LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
-print(std)
+print(sd)
-LabelStd[, 4] <- as.numeric(std)
+LabelStd[, 4] <- as.numeric(sd)
 write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
 write.csv(LabelStd, file = study_info, row.names = FALSE)
--- a/workflow/qhtcp-workflow
+++ b/workflow/qhtcp-workflow
@@ -139,7 +139,7 @@ print_help() {
 # `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string
 # @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string
 # @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string
-# @option -w<value> | --wrapper=<value> Requires two arguments: the name of the wrapper and its arguments, can be passed multiple times
+# @option -w<value> | --wrapper=<value> One or more wrappers and its arguments to run, can be passed multiple times or with a comma-separated string
 # @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis
 # @option --markdown Generate the shdoc markdown file for this program
 # @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
@@ -163,20 +163,22 @@ parse_input() {
      case $1 in
        --project|-p)
            shift
            declare -ga PROJECTS
            IFS=',' read -ra PROJECTS <<< "$1"
            ;;
        --module|-m)
            shift
            declare -ga MODULES
            IFS=',' read -ra MODULES <<< "$1"
            ;;
        --wrapper|-w)
            shift
-            IFS=',' read -ra WRAPPERS <<< "$1"
+            declare -ga WRAPPERS
            shift
            IFS=',' read -ra WRAPPERS <<< "$1"
            ;;
        --nomodule|-n)
            shift
            declare -ga EXCLUDE_MODULES
            IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
            ;;
        --markdown)
@@ -336,6 +338,10 @@ execute() {
 # @description Backup one or more files to an incremented .bk file
 #
 # **TODO**
 #
 # * Make backups hidden by prepending "."?
 #
 # @exitcode backup iterator max 255
 # @internal
 backup() {
@@ -343,8 +349,8 @@ backup() {
  for f in "$@"; do
    [[ -e $f ]] || continue
    count=1
-    while [[ -f $f.bk.$count ]]; do
+    while [[ -e $f.bk.$count ]]; do
-      count=$((count++))
+      ((count++))
    done
    echo "Backing up $f to $f.bk.$count"
    debug "rsync -a $f $f.bk.$count"
@@ -525,7 +531,7 @@ interactive_header() {
  echo ""
  # Module selection
-  if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 ]]; then
+  if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
    cat <<-EOF
 			${underline}Enter modules(s) to run${nounderline}
 			* <Enter> for all
@@ -554,7 +560,7 @@ interactive_header() {
  # If we're just installing dependencies, skip the rest
  [[ ${MODULES[*]} == "install_dependencies" ]] && return 0
-  # Submodule selection
+  # Wrapper selection
  if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
    while :; do
      cat <<-EOF
@@ -1335,7 +1341,7 @@ qhtcp() {
  [[ -d $QHTCP_RESULTS_DIR ]] || 
    err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?"
-  # Sets STUDIES_NUMS and STUDIES_DIRS
+  # Sets STUDIES
  study_info
  choose_easy_results "$EASY_OUT_DIR"
@@ -1348,22 +1354,24 @@ qhtcp() {
  # # TODO Add them all to StudiesDataArchive?
  # # Probably better to always add and remove dupes later since each invocation "counts"?
  # for f in "${EASY_RESULTS_FILES[@]}"; do
-  #   for s in "${STUDIES_NUMS[@]}"; do
+  #   for study in "${STUDIES[@]}"; do
  #     read -r num sd dir <<< "$study"
  #     # Trying to match old ExpFrontend formatting
  #     printf "%s\t" \
-  #       "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$s" \
+  #       "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$num" \
  #       "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \
  #       >> "$STUDIES_ARCHIVE_FILE"
  #   done
  # done
  # Run R interactions script on all studies
-  for s in "${STUDIES_NUMS[@]}"; do
+  for study in "${STUDIES[@]}"; do
-    [[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores ]] ||
+    read -r num sd dir <<< "$study"
-      execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores"
+    [[ -d $dir/zscores ]] ||
-    [[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores/qc ]] ||
+      execute mkdir "$dir/zscores"
-      execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores/qc"
+    [[ -d $dir/zscores/qc ]] ||
-    r_interactions "$s"
+      execute mkdir "$dir/zscores/qc"
    r_interactions "$num" "$sd"
  done \
  && remc \
  && gtf \
@@ -1384,13 +1392,12 @@ module remc
 remc() {
  debug "Running: ${FUNCNAME[0]}"
-  # Sets STUDIES_NUMS and STUDIES_DIRS
+  # Sets STUDIES
  study_info
  # If any wrappers fail the rest will not run, this is fundamental to module design
  # Remove leading && to run regardless
  r_join_interactions \
    "${STUDIES_DIRS[@]}" \
  && java_extract \
  && r_add_shift_values \
  && r_create_heat_maps \
@@ -1453,36 +1460,40 @@ module gta
 gta() {
  debug "Running: ${FUNCNAME[0]}"
-  gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
+  # gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
  gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
  sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
  all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"  
 # TODO This could be wrong, it could be in main results
-  # Sets STUDIES_NUMS and STUDIES_DIRS
+  # Sets STUDIES
  study_info
  [[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR"
  execute mkdir "$GTA_OUT_DIR"
  # Loop over the array and create pairwise arrays
-  for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
+  for ((i=0; i<${#STUDIES[@]}; i++)); do
-    for ((j=i+1; j<${#STUDIES_NUMS[@]}; j++)); do
+    for ((j=i+1; j<${#STUDIES[@]}; j++)); do
-      pair=("${STUDIES_NUMS[i]}" "${STUDIES_NUMS[j]}")
+      read -r num1 _ _ <<< "${STUDIES[i]}"
      read -r num2 _ _ <<< "${STUDIES[j]}"
      pair=("$num1" "$num2")
      echo "${pair[@]}"
    done
  done
  # Create unique parwise combinations of study nums from dir names
  study_combos=()
-  for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
+  for ((i=0; i<${#STUDIES[@]}; i++)); do
    # Loop through the array again
-    for ((j=0; j<${#STUDIES_NUMS[@]}; j++)); do
+    for ((j=0; j<${#STUDIES[@]}; j++)); do
      # If the indices are not the same
      if [ "$i" != "$j" ]; then
        # Print the unique combination
-        study_combos+=("${STUDIES_NUMS[$i]},${STUDIES_NUMS[$j]}")
+        read -r num1 _ _ <<< "${STUDIES[i]}"
        read -r num2 _ _ <<< "${STUDIES[j]}"
        study_combos+=("$num1,$num2")
      fi
    done
  done
@@ -1490,11 +1501,12 @@ gta() {
  # The following are three types of studies
  # Individual studies
-  for s in "${STUDIES_NUMS[@]}"; do
+  for study in "${STUDIES[@]}"; do
-    zscores_file="$QHTCP_RESULTS_DIR/Exp$s/$zscores_file"
+    read -r num _ dir <<< "$study"
    zscores_file="$dir/zscores/zscores_interaction.csv"
    if [[ -f $zscores_file ]]; then
-      mkdir "$GTA_OUT_DIR/Exp$s"
+      mkdir "$GTA_OUT_DIR/Exp$num"
-      r_gta "Exp$s" "$zscores_file"
+      r_gta "Exp$num" "$zscores_file"
    fi
  done
@@ -1507,6 +1519,12 @@ gta() {
  # All studies
  # All preceding arguments are required so we can pass multiple studies
  declare -a nums
  for study in "${STUDIES[@]}"; do
    read -r num _ _ <<< "$study"
    nums+=("$num")
  done
  r_gta_heatmaps \
    "$STUDY_INFO_FILE" \
    "$gene_ontology_obo" \
@@ -1514,7 +1532,7 @@ gta() {
    "$all_sgd_terms_csv" \
    "$QHTCP_RESULTS_DIR" \
    "$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \
-    "${STUDIES_NUMS[@]}"
+    "${nums[@]}"
 }
@@ -1714,7 +1732,7 @@ wrapper r_interactions
 # @arg $3 string study info file
 # @arg $4 string SGD_features.tab
 # @arg $5 string easy/results_std.txt
-# @arg $6 string zscores directory
+# @arg $6 string output directory
 r_interactions() {
  debug "Running: ${FUNCNAME[0]} $*"
  cat <<-EOF
@@ -1729,7 +1747,11 @@ r_interactions() {
 		  * Background values are reported in the results sheet and so could also be analyzed there.
 	EOF
-  script="$APPS_DIR/r/interactions.R"
+  declare script="$APPS_DIR/r/interactions.R"
  declare out_dir="${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}"
  [[ -d $out_dir ]] && backup "$out_dir"
  mkdir "$out_dir"
  execute "$RSCRIPT" "$script" \
    "$1" \
@@ -1737,12 +1759,15 @@ r_interactions() {
    "${3:-"$STUDY_INFO_FILE"}" \
    "${4:-"$APPS_DIR/r/SGD_features.tab"}" \
    "${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \
-    "${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}" \
+    "$out_dir" \
    "${@:7}" # future arguments
  [[ -f "$out_dir/zscores_interaction.csv" ]] || (echo "$out_dir/zscores_interaction.csv does not exist"; return 1)
 }
 wrapper r_join_interactions
 # shellcheck disable=SC2120
 # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
 #
 # TODO
@@ -1760,20 +1785,41 @@ wrapper r_join_interactions
 # * Shift_only.csv
 # * parameters.csv
 #
-# @arg $1 string output directory
+# @arg $1 string output directory (required)
-# @arg $2 string sd value (default: 2)
+# @arg $2 string sd value (default: 2) (required)
-# @arg $3 string study info file
+# @arg $3 string study info file (required)
 # @arg $4 array studies (required) 
 r_join_interactions() {
  debug "Running: ${FUNCNAME[0]} $*"
-  script="$APPS_DIR/r/joinInteractExps.R"
+  declare script="$APPS_DIR/r/joinInteractExps.R"
  declare -a dirs
  declare -a out_files=(
    "${1:-$QHTCP_RESULTS_DIR}/REMcRdy_lm_only.csv"
    "${1:-$QHTCP_RESULTS_DIR}/Shift_only.csv" 
    "${1:-$QHTCP_RESULTS_DIR}/parameters.csv"
  )
  ((DEBUG)) && declare -p
  backup "${out_files[@]}"
  # If user provides study dirs, use those
  if [[ $# -gt 3 ]]; then
    dirs=("${@:4}")
  else
    study_info
    for study in "${STUDIES[@]}"; do
      read -r _ _ dir <<< "$study"
      dirs+=("$dir") 
    done
  fi
  execute "$RSCRIPT" "$script" \
    "${1:-$QHTCP_RESULTS_DIR}" \
    "${2:-2}" \
    "${3:-$STUDY_INFO_FILE}" \
-    "${@:4:-${STUDIES_DIRS[@]}}"
+    "${dirs[@]}"
  local out_files=("$1/REMcRdy_lm_only.csv" "$1/Shift_only.csv" "$1/parameters.csv")
  for f in "${out_files[@]}"; do
    [[ -f $f ]] || (echo "$f does not exist"; return 1)
  done
@@ -1816,6 +1862,9 @@ java_extract() {
    "${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}"
    "${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
    "${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
    1
    true
    true
  )
  debug "pushd && ${java_cmd[*]} && popd"
@@ -2021,8 +2070,7 @@ r_compile_gtf() {
 #
 # @exitcode 0 If one or more studies found
 # @exitcode 1 If no studies found
-# @set STUDIES_NUMS array contains Exp numbers
+# @set STUDIES array contains array of "Exp# sd ExpDir"
 # @set STUDIES_DIRS array contains Exp directories
 study_info() {
  debug "Running: ${FUNCNAME[0]}"
@@ -2116,26 +2164,28 @@ study_info() {
  fi
  # Read study info file
-  while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
+  declare -ga STUDIES
-    STUDIES_NUMS+=("$col1")
+  while IFS=',' read -r num _ sd _; do
    STUDIES+=("$num $sd $QHTCP_RESULTS_DIR/Exp$num")
  done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
  # Initialize missing Exp dirs
-  STUDIES_DIRS=()
+  for study in "${STUDIES[@]}"; do
-  for s in "${STUDIES_NUMS[@]}"; do
+    read -r _ _ dir <<< "$study"
-    study_dir="$QHTCP_RESULTS_DIR/Exp$s"
+    [[ -d $dir ]] || mkdir "$dir"
    STUDIES_DIRS+=("$study_dir")
    [[ -d $study_dir ]] || mkdir "$study_dir"
    # We don't need a template anymore?
    # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
    #   err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
    #   continue
    # fi
  done
  #   # We don't need a template anymore?
  #   # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
  #   #   err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
  #   #   continue
  #   # fi
  # done
  ((DEBUG)) && declare -p STUDIES
  # Return true if at least one study was found
-  [[ ${#STUDIES_NUMS[@]} -gt 0 ]]
+  [[ ${#STUDIES[@]} -gt 0 ]]
 }
@@ -2287,6 +2337,8 @@ main() {
  parse_input "$@" # parse arguments with getopt
  # ((DEBUG)) && declare -p
  interactive_header "$@"
  # # Prompt user for the PROJECT if we still don't have one
@@ -2345,14 +2397,8 @@ main() {
    declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta"
    declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf"
    declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
-    if ((DEBUG)); then
+    
-      echo "Debug:"
+    # ((DEBUG)) && declare -p
      declare -p SCANS_DIR OUT_DIR TEMPLATES_DIR APPS_DIR \
      PROJECTS PROJECT_NAME \
      PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \
      STUDIES_ARCHIVE_FILE QHTCP_RESULTS_DIR QHTCP_TEMPLATE_DIR \
      STUDY_INFO_FILE EASY_RESULTS_DIR R_LIBS_USER
    fi
    debug "Active modules: ${MODULES[*]}"
    debug "Active wrappers and their args: ${WRAPPERS[*]}"
@@ -2365,21 +2411,19 @@ main() {
    done
    # Run selected wrappers
-    for i in "${!WRAPPERS[@]}"; do
+    for wrapper in "${WRAPPERS[@]}"; do
-      IFS=',' read -ra args <<< "${WRAPPERS[$((i+1))]}" # load the command args
+      IFS=',' read -ra args <<< "$wrapper" # load the command args
-      if ask "Run ${WRAPPERS[i]} wrapper with args ${args[*]}?"; then
+      if ask "Run ${args[0]} wrapper with args ${args[*]:1}?"; then
-        "${WRAPPERS[i]}" "${args[@]}" || return 1
+        "${args[0]}" "${args[@]:1}" || return 1
      fi
      continue 2 # skip the command string
    done
  done
-  cat <<-EOF
+  [[ ${#MODULES[@]} -gt 0 ]] && echo "Successfully ran module(s): ${MODULES[*]}"
-		Successfully ran module(s): ${MODULES[*]}
+  [[ ${#WRAPPERS[@]} -gt 0 ]] && echo "Successfully ran wrapper(s): ${WRAPPERS[*]}"
-		And wrapper(s): ${WRAPPERS[*]}
+  [[ ${#PROJECTS[@]} -gt 0 ]] && echo "On project(s): ${PROJECTS[*]}"
-		On project(s): ${PROJECTS[*]}
+
-	EOF
+  unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES SET_STUDIES YES
  unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES_NUMS STUDIES_DIRS SET_STUDIES YES
 }
 # (Safe) main loop