More progress on initial commit

2024-07-22 15:54:36 -04:00
parent 06dd700680
commit 8cf3703b0c
1 changed files with 273 additions and 169 deletions
--- a/workflow/script-run-workflow
+++ b/workflow/script-run-workflow
@@ -1,7 +1,9 @@
 #!/usr/bin/env bash
 # Copyright 2024 Bryan C. Roessler
-# This is currently a code scratchpad for organizing the Hartman Lab Server workflow
+#
+# This is a code scratchpad for organizing the Hartman Lab Server workflow
 # It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
+#
 # Allow indirect functions
 # shellcheck disable=SC2317
 #
@@ -9,8 +11,14 @@
 # @name HartmanLabWorkflow
 # @brief One script to rule them all (see: xkcd #927)
 # @description Executes the Hartman Lab image analysis workflow
-# @arg $1 string A project name
-#
+# @option -p<value> | --project=<value> Include one or more projects in the analysis
+# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
+# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
+# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
+# @option -d | --debug Turn on extra debugging output
+# @option -h | --help Print help message and exit (overrides other options)
+
+DEBUG=1 # Turn debugging ON by default during development
 shopt -s extglob

 # @section Libraries
@@ -22,42 +30,53 @@ PERL="${PERL:-perl}"
 # @section Help
 # @description Print a helpful message
 print_help() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
+
+  install_dependencies --get-depends # Loads the dependency arrays

  cat <<-EOF
 		USAGE:
-			script-run-workflow [[OPTION] [VALUE]]...
+		  script-run-workflow [[OPTION] [VALUE]]...

-			Some options (--project, --module) can be passed multiple times for batch operations.
+		  Some options (--project, --include, --exclude) can be passed multiple times or
+		  by using comma deliminated strings (see EXAMPLES below)

 		OPTIONS:
-			--project, -p PROJECT
-				PROJECT should follow the pattern ${PROJECT_PREFIX}_UNIQUE_PROJECT_NAME
-			--include, -i MODULE
-				See MODULES section below for list of available modules
-				If no --module is specified, all modules are run
-			--exclude, -x MODULE
-				See MODULES section below for list of available modules
-			--yes, -y, --auto
-				Always answer yes to questions (non-interactive mode)
-			--debug, -d
-				Print extra debugging info
-			--help, -h
-				Print this help message and exit
+		  --project, -p PROJECT
+		    PROJECT should follow the pattern ${PROJECT_PREFIX}_PROJECT_NAME
+		  --include, -i MODULE
+		    See MODULES section below for list of available modules
+		    If no --include is specified, all modules are run
+		  --exclude, -x MODULE
+		    See MODULES section below for list of modules to exclude
+		  --yes, -y, --auto
+		    Always answer yes to questions (non-interactive mode)
+		  --debug, -d
+		    Print extra debugging info
+		  --help, -h
+		    Print this help message and exit

 		MODULES:
-			${ALL_MODULES[*]}
+		  ${ALL_MODULES[*]}
+
+		SUBMODULES:
+		  ${ALL_SUBMODULES[*]}

 		DEPENDENCIES:
-			binaries (system): graphviz pandoc pdftk-java gd-devel
-			perl (cpan): File::Map ExtUtils::PkgConfig GD GO::TermFinder
-			R (default): BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc rmarkdown
-			R (BiocManager):
+		  deb: ${depends_deb[@]}
+		  rpm: ${depends_rpm[@]}
+		  brew: ${depends_brew[@]}
+		  perl: ${depends_perl[@]}
+		  R: ${depends_r[@]}
+		  BiocManager: ${depends_bioc[@]}

 		EXAMPLES:
-			script-run-workflow --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
-			script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
-			script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[1]} --module ${ALL_MODULES[2]} --yes
+		  script-run-workflow --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
+		  script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
+		  script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[1]} --module ${ALL_MODULES[2]} --yes
+		  script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]}
+		  script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT
+		  script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug
 	EOF
 }

@@ -65,10 +84,9 @@ print_help() {
 # @section User Input
 # @description Creates array and switches from user input
 parse_input() {
-  echo "Running: ${FUNCNAME[0]}" "$@"
+  debug "Running: ${FUNCNAME[0]}" "$@"

  long_opts="project:,include:,exclude:,yes,auto,debug,help"
-  #long_opts+="restorefile:,betapass:,"
  short_opts="+p:i:x:yhd"

  if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
@@ -77,15 +95,27 @@ parse_input() {
      case $1 in
        --project|-p)
          shift
-          declare -ga PROJECT_NAMES+=("$1")
+          if [[ $1 == *','* ]] ; then # check for commas
+            IFS=',' read -ra PROJECTS <<< "$1"
+          else
+            PROJECTS+=("$1")
+          fi
          ;;
        --include|-i)
          shift
-          declare -ga MODULES+=("$1")
+          if [[ $1 == *','* ]] ; then # check for commas
+            IFS=',' read -ra INCLUDE_MODULES <<< "$1"
+          else
+            INCLUDE_MODULES+=("$1")
+          fi
          ;;
        --exclude|-x)
          shift
-          declare -ga EXCLUDE_MODULES+=("$1")
+          if [[ $1 == *','* ]] ; then # check for commas
+            IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
+          else
+            EXCLUDE_MODULES+=("$1")
+          fi
          ;;
        --yes|-y|--auto)
          declare -g YES=1
@@ -109,12 +139,118 @@ parse_input() {
 }


+# @section Helper functions
+# @internal
+module() { 
+  debug "Adding $1 module"
+  ALL_MODULES+=("$1")
+  declare -gA "$1"
+}
+submodule() { 
+  debug "Adding $1 submodule"
+  ALL_SUBMODULES+=("$1")
+  declare -gA "$1"
+}
+# This function will only work if users have an actual name registered on the server
+# TODO for now just use username
+# user_initials() { 
+#   user_record="$(getent passwd "$(whoami)")"
+#   user_gecos_field="$(echo "$user_record" | cut -d ':' -f 5)"
+#   user_full_name="$(echo "$user_gecos_field" | cut -d ',' -f 1)"
+#   last="${user_full_name#* }"
+#   echo "${user_full_name:0:1}${last:0:1}"
+# }
+ask() {
+    declare response
+    (( YES )) && return 0
+    read -r -p "$* [y/N]: " response
+    [[ ${response,,} =~ ^(yes|y)$ ]]
+}
+err() { echo "Error: $*" >&2; }
+ask_pn() { 
+  declare -g PROJECT
+  read -r -p "Enter a full project name (ex. ${PROJECT_PREFIX}_PROJECT_NAME): " PROJECT
+}
+debug() { (( DEBUG )) && echo "Debug: $*"; }
+
+
+# @section Modules
+# @description A module contains a cohesive set of actions/experiments to run on a project
+# Use a module when:
+#   * Building a new type of analysis
+#   * Combining submodules
+#
+#
+
+module install_dependencies
+# @description Installs dependencies for the workflow
+install_dependencies() {
+  debug "Running: ${FUNCNAME[0]}" "$@"
+
+  # Dependency arrays
+  depends_rpm=(graphviz pandoc pdftk-java gd-devel)
+  depends_deb=(graphviz pandoc pdftk-java libgd-dev)
+  depends_brew=(graphiz pandoc gd pdftk-java)
+  depends_perl=(File::Map ExtUtils::PkgConfig GD GO::TermFinder)
+  depends_r=(BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 
+    plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc rmarkdown)
+  depends_bioc=(org.Sc.sgd.db)
+
+  [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars
+
+  # Install system-wide dependencies
+  echo "Installing system dependencies"
+  case "$(uname -s)" in
+    Linux*|CYGWIN*|MINGW*)
+      ask "Detected Linux platform, continue?" || return 1
+      echo "You may be prompted for your sudo password to install system packages"
+      if hash dnf &>/dev/null; then
+        sudo dnf install "${depends_rpm[@]}"
+      elif hash apt &>/dev/null; then
+        sudo apt install "${depends_deb[@]}"
+      fi
+      ;;
+    Darwin*)
+      ask "Detected Mac platform, continue?" || return 1
+      export HOMEBREW_BREW_GIT_REMOTE="https://github.com/Homebrew/brew"
+      curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh|bash
+      brew install "${depends_brew[@]}"
+      ;;
+    *)          
+      echo "Your system could not be detected, please install dependencies manually"
+      ;;
+  esac
+
+  # Install perl CPAN modules
+  echo "Installing perl CPAN modules"
+  debug "cpan" "${depends_perl[@]}"
+  cpan "${depends_perl[@]}"
+
+  # Install R packages
+  echo "Installing R packages"
+
+  depends_r_str=""
+  depends_r_to_string() {
+    for d in "${depends_r[@]}"; do
+      depends_r_str+="$d\", \""
+    done
+    depends_r_str="${depends_r_str::-3}" # strip last , " (comma and quote)
+  }
+  depends_r_to_string
+  
+  debug "Rscript -e install.packages(c(\"$depends_r_str), dep=TRUE, repos=\"https://cloud.r-project.org\")"
+  Rscript -e "install.packages(c(\"$depends_r_str), dep=TRUE, repos=\"https://cloud.r-project.org\")"
+  Rscript -e "BiocManager::install(\"${depends_bioc[0]}\")"
+}
+
+
+module init_job
 # @section Initialize a new job in the scans directory
 # @description Create a new ExpJobs project
 # TODO Copy over source image directories from robot - are these alse named by the ExpJobs name?
 init_job() {
-  echo "Running: ${FUNCNAME[0]}"
-
+  debug "Running: ${FUNCNAME[0]}"
+  
  if [[ -d $SCAN_DIR ]]; then
    ask "$SCAN_DIR already exists, re-initialize?" || return 0
  else
@@ -124,17 +260,17 @@ init_job() {

  [[ -d $SCAN_DIR/MasterPlateFiles ]] || mkdir -p "$SCAN_DIR/MasterPlateFiles"

-  DRUG_MEDIA_FILE="$SCAN_DIR/MasterPlateFiles/DrugMedia_$PROJECT_NAME.xls"
-  MASTER_PLATE_FILE="$SCAN_DIR/MasterPlateFiles/MasterPlate_$PROJECT_NAME.xls"
+  DRUG_MEDIA_FILE="$SCAN_DIR/MasterPlateFiles/DrugMedia_$PROJECT.xls"
+  MASTER_PLATE_FILE="$SCAN_DIR/MasterPlateFiles/MasterPlate_$PROJECT.xls"

  # TODO Where are the actual templates?
  for f in $DRUG_MEDIA_FILE $MASTER_PLATE_FILE; do
    touch "$f"
  done
-
 }


+module easy
 # @section EASY
 # @description Start an EASY analysis
 # The QHTCPImageFolders and ‘MasterPlateFiles’ folder are the inputs for image analysis with EASY software. 
@@ -142,7 +278,7 @@ init_job() {
 # The ‘Results’ directory is created and entered, using the “File >> New Experiment” dropdown in EASY. 
 # Multiple ‘Results’ files may be created (and uniquely named) within an ‘ExperimentJob’ folder.
 easy() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  EASY="/mnt/data/EASY/EasyDev2024/BU/EASY240430AppExported/EstartConsole.m"

  pushd "$SCAN_DIR" || return 1 
@@ -172,22 +308,35 @@ easy() {
  echo "EASY OUTPUT ARRAY: " "${EASY_OUT_ARRAY[@]}"
 }

+
+module ezview
 # @section EZView
 ezview() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  EZVIEW_DIR="/mnt/data/EZVIEW"
  echo "$EZVIEW_DIR"
 }


-# @section StudiesQHTCP
-# @ description This section is derived from the earliest work of Jinyu Guo.  As such it uses Perl scripts. Without porting these two Perl scripts into a new intergrated R script or Python script, one is contrained to use the rather crude copy-paste and and shell script inherient in the original procedures. These two Perl scripts are analyze_v2.pl and terms2tsv_v4.pl which were written in 2003 by Gavin Sherlock for the SGD gene ontology system and require perl installations of such files.  These also require that the gene_ontology_edit.obo, SGD_features.tab files used in the ../Code also be included here. Without rewriting the code, one must compromise directory convenience.  
-
-# @description Main loop for qhtcp modules (rewrite of REMcMaster3.sh)
+module qhtcp
+# @section QHTCP
+# @description Main QHTCP module (functional rewrite of REMcMaster3.sh)
 qhtcp() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  TEMPLATE_DIR="$SCRIPT_DIR/templates/qhtcp"
-  QHTCP_DIR="/mnt/data/StudiesQHTCP/$PROJECT_NAME"
+  QHTCP_DIR="/mnt/data/StudiesQHTCP/$PROJECT"
+
+  # Our list of submodules (functions) to run for this module
+  # Put these in the appropriate order of operations
+  submodules=(
+    r_join_interact
+    java_extract
+    r_add_shift_values
+    r_heat_maps_zscores
+    r_heat_maps_homology
+    py_gtf
+    r_compile_gtf
+  )
  
  while [[ -d $QHTCP_DIR ]]; do
    echo "A project already exists at $QHTCP_DIR"
@@ -202,24 +351,38 @@ qhtcp() {
    echo "New project created at $QHTCP_DIR"
  fi

+  # Create StudyInfo.csv
+  # Right now this is identical to the template but we can change it later
+  cat <<-EOF > "$QHTCP_DIR/Code/StudyInfo.csv"
+		ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy
+		1,ExpName1,NA,NA,UserInitials
+		2,ExpName2,NA,NA,UserInitials
+		3,ExpName3,NA,NA,UserInitials
+		4,ExpName4,NA,NA,UserInitials
+	EOF
+
  # Enter REMc directory to run the scripts there
  pushd "$QHTCP_DIR/REMc" || return 1

-  r_join_interact &&
-  java_jingyu_extract &&
-  r_add_shift_values &&
-  r_heat_maps_zscores &&
-  r_heat_maps_homology &&
-  py_gtf &&
-  r_compile_gtf
+  # Run each submodule
+  for s in "${submodules[@]}"; do "$s"; done

  popd || return 1
 }


+# @section Submodules
+# @description Submodules provide functionaility to modules and are reusable between modules
+# Use a submodule when:
+#   * Calling external scripts
+#   * Performing repetitive tasks
+#   *
+#
+
+submodule r_join_interact
 # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
 r_join_interact() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  echo "Rscript JoinInteractExps3dev.R"
  Rscript JoinInteractExps3dev.R
  out_file="REMcRdy_lm_only.csv"
@@ -230,10 +393,11 @@ r_join_interact() {
 }


+submodule java_extract
 # @description Jingyu's REMc java utility using file input file REMcRdy_lm_only.csv 
 # and output REMcRdy_lm_only.csv-finalTable.csv
-java_jingyu_extract() {
-  echo "Running: ${FUNCNAME[0]}"
+java_extract() {
+  debug "Running: ${FUNCNAME[0]}"
  classpath="jingyuJava_1_7_extractLib.jar"
  out_file="REMcRdy_lm_only.csv-finalTable.csv"

@@ -253,10 +417,11 @@ java_jingyu_extract() {
 }


+submodule r_add_shift_values
 # @description Add shift values back to REMcRdy_lm_only.csv-finalTable.csv
 # and output "REMcWithShift.csv" for use with the REMc heat maps
 r_add_shift_values() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  out_file="REMcHeatmaps/REMcWithShift.csv"
  echo "Rscript AddShiftVals2.R"
  Rscript AddShiftVals2.R
@@ -265,9 +430,10 @@ r_add_shift_values() {
 }


+submodule r_heat_maps_zscores
 # @description Execute REMcHeatmaps_zscores.R
 r_heat_maps_zscores() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  out_file="REMcHeatmaps/compiledREMcHeatmaps.pdf"
  echo "Rscript REMcHeatmaps_zscores.R"
  Rscript REMcHeatmaps_zscores.R
@@ -278,9 +444,10 @@ r_heat_maps_zscores() {
 }


+submodule r_heat_maps_homology
 # @description Execute REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R
 r_heat_maps_homology() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  work_dir="REMcHeatmapsWithHomology"
  source_file="REMcHeatmaps/REMcWithShift.csv"
  target_file="$work_dir/REMcWithShift.csv"
@@ -307,9 +474,10 @@ r_heat_maps_homology() {
 }


+submodule py_gtf
 # @description Perform GTF
 py_gtf() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  process_dir="GTF/Process"
  function_dir="GTF/Function"
  component_dir="GTF/Component"
@@ -325,7 +493,7 @@ py_gtf() {
  # @description Not sure what to name this
  # @arg $1 string directory name
  _process() {
-    echo "Running: ${FUNCNAME[0]}" "$@"
+    debug "Running: ${FUNCNAME[0]}" "$@"
    pushd "$1" || return 1

    shopt -s nullglob
@@ -350,6 +518,27 @@ py_gtf() {
    popd || return 1
  }

+
+  
+  # @description Perl analyze submodule
+  # @arg $1 string "Set 1"
+  # @arg $@ string
+  pl_analyze() {
+    :
+
+
+  }
+
+
+  pl_terms2tsv() {
+    :
+
+
+
+  }
+
+
+
  # Perform operations in each directory
  for d in "$process_dir" "$function_dir" "$component_dir"; do 
    set1="ORF_List_Without_DAmPs.txt"
@@ -361,141 +550,56 @@ py_gtf() {

 # @description Compile GTF in R
 r_compile_gtf() {
-  echo "Running: ${FUNCNAME[0]}"
+  debug "Running: ${FUNCNAME[0]}"
  echo "Rscript CompileGTF.R"
  Rscript CompileGTF.R
 }


-# @description Installs dependencies for the workflow
-install_dependencies() {
-  echo "Running: ${FUNCNAME[0]}"
-
-  # Install system-wide dependencies
-  echo "Installing system dependencies"
-  case "$(uname -s)" in
-    Linux*|CYGWIN*|MINGW*)
-      ask "Detected Linux platform, continue?" || return 1
-      echo "You may be prompted for your sudo password to install system packages"
-      if hash dnf &>/dev/null; then
-        sudo dnf install graphviz pandoc pdftk-java gd-devel
-      elif hash apt &>/dev/null; then
-        sudo apt install graphviz pandoc pdftk-java libgd-dev
-      fi
-      ;;
-    Darwin*)
-      ask "Detected Mac platform, continue?" || return 1
-      export HOMEBREW_BREW_GIT_REMOTE="https://github.com/Homebrew/brew"
-      curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh|bash
-      brew install graphiz
-      brew install gd
-      brew install pdftk-java
-      brew install pandoc 
-      cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder
-      ;;
-    *)          
-      echo "Your system could not be detected, please install dependencies manually"
-      ;;
-  esac
-  
-  # Install perl CPAN modules
-  echo "Installing perl CPAN modules"
-  echo "cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder"
-  cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder
-
-  # Install R packages
-  echo "Installing R packages"
-  Rscript -e 'install.packages(c(\
-    "BiocManager", \
-    "ontologyIndex" \
-    "ggrepel" \
-    "tidyverse" \
-    "sos" \
-    "openxlsx" \
-    "ggplot2" \
-    "plyr" \
-    "extrafont" \
-    "gridExtra" \
-    "gplots" \
-    "stringr" \
-    "plotly" \
-    "ggthemes" \
-    "pandoc" \
-    "rmarkdown" \
-    ), dep=TRUE, repos="https://cloud.r-project.org")'
-  Rscript -e 'BiocManager::install("org.Sc.sgd.db")'
-}
-
-
-# @internal
-ask() {
-    declare response
-    (( YES )) && return 0
-    read -r -p "$* [y/N]: " response
-    [[ ${response,,} =~ ^(yes|y)$ ]]
-}
-# @internal
-err() { echo "Error: $*" >&2; }
-# @internal
-ask_pn() { 
-  declare -g PROJECT_NAME
-  read -r -p "Enter a full project name (ex. ${PROJECT_PREFIX}_PROJECT_NAME): " PROJECT_NAME
-}
-# @internal
-debug() { (( DEBUG )) && echo "Debug: $*"; }
-
-
 # @description The main loop of script-run-workflow
 # May eventually need to add git ops
 # Passes on arguments
 # Most variables in main() are user configurable or can be overriden by env
 main() {
-  echo "Running: ${FUNCNAME[0]}" "$@"
+  debug "Running: ${FUNCNAME[0]}" "$@"
  
  # Where are we located?
  SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

  # Set the automatic project directory prefix
  PROJECT_PREFIX="$(whoami)_$(date +%y_%m_%d)"
-
-  # When adding a module, it also should be added to this list
-  ALL_MODULES=(
-    install_dependencies
-    init_job
-    easy
-    ezview
-    qhtcp
-  )
+  san() { [[ $1 =~ .+_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_.+ ]]; } # sanitizer regex for prefix
  
-  declare -a PROJECT_NAMES=() # this array will hold all of the projects to run
-  [[ $# -eq 1 ]] && PROJECT_NAMES+=("$1") # easy way to run on single dir
-  [[ $# -ge 2 ]] && parse_input "$@" # parse arguments with getopt
+  declare -ag PROJECTS=() # this array will hold all of the projects for this run

-  # Prompt user for the PROJECT_NAME if we still don't have one
-  if [[ ${#PROJECT_NAMES[@]} -eq 0 ]]; then # still allows for environment overrides
+  parse_input "$@" # parse arguments with getopt
+
+  # Prompt user for the PROJECT if we still don't have one
+  if [[ ${#PROJECTS[@]} -eq 0 ]]; then # still allows for environment overrides
    ask_pn
-    PROJECT_NAMES+=("$PROJECT_NAME")
+    PROJECTS+=("$PROJECT")
  fi
-
-  # Sanitize PROJECT_NAMES
-  # This regex should match PROJECT_PREFIX
-  san() { [[ $1 =~ .+_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_.+ ]]; }
-  for i in "${!PROJECT_NAMES[@]}"; do
-    if ! san "${PROJECT_NAME[i]}"; then
-      echo "Project name ${PROJECT_NAME[$i]} is invalid"
+  
+  for i in "${!PROJECTS[@]}"; do
+    if ! san "${PROJECTS[i]}"; then
+      echo "Project name ${PROJECTS[i]} is invalid"
      echo "Enter a replacement"
      ask_pn
-      san "$PROJECT_NAME" || (echo "RTFM"; return 1)
-      PROJECT_NAME[i]="$PROJECT_NAME"
+      san "$PROJECT" || (echo "RTFM"; return 1)
+      PROJECTS[i]="$PROJECT"
    fi
  done

  SCANS_DIR="${SCANS_DIR:-"/mnt/data/ExpJobs"}" # TODO propose changing this to something else

  # If we don't catch with getopt or env, run all
-  [[ ${#MODULES[@]} -eq 0 ]] && MODULES=("${ALL_MODULES[@]}")
+  if [[ ${#INCLUDE_MODULES[@]} -eq 0 ]]; then
+    MODULES=("${ALL_MODULES[@]}")
+  else
+    MODULES=("${INCLUDE_MODULES[@]}")
+  fi

-  # Exclude modules overrides include
+  # Exclude modules from --exclude
  arr=()
  for m in "${MODULES[@]}"; do
    [[ " ${EXCLUDE_MODULES[*]} " =~ [[:space:]]${m}[[:space:]] ]] || arr+=("$m")
@@ -508,15 +612,15 @@ main() {
    if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then
      echo "Module $m not in the module list"
      echo "Available modules: ${ALL_MODULES[*]}"
-      read -r -p "Enter replacement name: " MODULE
+      read -r -p "Enter replacement module name: " MODULE
      ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULE}[[:space:]] ]] || (echo "RTFM"; return 1)
      MODULES[i]="$MODULE"
    fi
  done

  # Loop over projects
-  for PROJECT_NAME in "${PROJECT_NAMES[@]}"; do 
-    SCAN_DIR="$SCANS_DIR/$PROJECT_NAME"
+  for PROJECT in "${PROJECTS[@]}"; do 
+    SCAN_DIR="$SCANS_DIR/$PROJECT"

    # Run selected modules
    for m in "${MODULES[@]}"; do