Improve EASY result parsing to supersede portion of ExpFrontend.m

2024-07-23 20:23:56 -04:00
parent 9179846c04
commit b049d58e79
3 changed files with 293 additions and 194 deletions
--- a/workflow/script-run-workflow
+++ b/workflow/script-run-workflow
@@ -25,7 +25,7 @@
 # @option -h | --help Print help message and exit (overrides other options)
 DEBUG=1 # Turn debugging ON by default during development
-shopt -s extglob# Overview and Introduction to Directory Structure
+shopt -s extglob
 # @section Libraries
@@ -190,8 +190,10 @@ ask() {
 }
 err() { echo "Error: $*" >&2; }
 ask_pn() {
  unset PROJECT
  declare -g PROJECT
  read -r -p "Enter a full project name (ex. ${PROJECT_PREFIX}_PROJECT_NAME): " PROJECT
  [[ -z $PROJECT ]]
 }
 debug() { (( DEBUG )) && echo "Debug: $*"; }
@@ -232,12 +234,7 @@ module install_dependencies
 # For R:
 # install.packages(“BiocManager”)
 # BiocManager::install(“org.Sc.sgd.db”)
-# install.packages(c(‘ontologyIndex’, 'ggrepel', 'tidyverse', 'sos', 'openxlsx'), dep=TRUE)
+# install.packages(c('ontologyIndex', 'ggrepel', 'tidyverse', 'sos', 'openxlsx'), dep=TRUE)
 install_dependencies() {
  debug "Running: ${FUNCNAME[0]}" "$@"
@@ -342,7 +339,6 @@ init_project() {
 module easy
 # @section EASY
 # @description Start an EASY analysis
 # Eliminated EstartConsole.m
 # TODO Don't create output in the scans folder, put it in an output directory
 # TODO The !!Results output files need standardized naming
 # TODO Don't perform directory operations in EASY
@@ -351,25 +347,35 @@ module easy
 # * The 'Results' directory is created and entered, using the "File >> New Experiment" dropdown in EASY. 
 # * Multiple 'Results' files may be created (and uniquely named) within an 'ExperimentJob' folder.
 #
-# Directory Layout
+# Template:
-# EASY
+# templates/easy
-#   /figs
+#  * [datatipp.m](templates/easy/datatipp.m)
-#   /PTmats
+#  * [DgenResults.m](templates/easy/DgenResults.m)
-#   datatipp.m
+#  * [DMPexcel2mat.m](templates/easy/DMPexcel2mat.m)
-#   DgenNoGrowthResults200809.m
+#  * [EASYconsole.asv](templates/easy/EASYconsole.asv)
-#   DMPexcel2mat_2023winLinix.m
+#  * [EASYconsole.fig](templates/easy/EASYconsole.fig)
-#   EASYconsole.fig
+#  * [EASYconsole.m](templates/easy/EASYconsole.m)
-#   EASYconsole.m
+#  * [figs](templates/easy/figs)
-#   NCdisplayGui.m
+#    * [NPTdirect.fig](templates/easy/figs/NPTdirect.fig)
-#   NCfitImCFparforFailGbl2.m
+#    * [searchNPTIm.fig](templates/easy/figs/searchNPTIm.fig)
-#   NCscurImCF_3parfor.m
+#  * [NCdisplayGui.m](templates/easy/NCdisplayGui.m)
-#   NCsingleDisplay.m
+#  * [NCfitImCFparforFailGbl2.m](templates/easy/NCfitImCFparforFailGbl2.m)
-#   NIcircle.m
+#  * [NCscurImCF_3parfor.m](templates/easy/NCscurImCF_3parfor.m)
-#   NImParamRadiusGui.m
+#  * [NCsingleDisplay.m](templates/easy/NCsingleDisplay.m)
-#   NIscanIntensBGpar4GblFnc.m
+#  * [NIcircle.m](templates/easy/NIcircle.m)
-#   p4loop8c.m
+#  * [NImParamRadiusGui.m](templates/easy/NImParamRadiusGui.m)
-#   par4Gbl_Main8c.m
+#  * [NIscanIntensBGpar4GblFnc.m](templates/easy/NIscanIntensBGpar4GblFnc.m)
-#   par4GblFnc8c.m
+#  * [p4loop8c.m](templates/easy/p4loop8c.m)
 #  * [par4GblFnc8c.m](templates/easy/par4GblFnc8c.m)
 #  * [par4Gbl_Main8c.m](templates/easy/par4Gbl_Main8c.m)
 #  * [PTmats](templates/easy/PTmats)
 #      * [Nbdg.mat](templates/easy/PTmats/Nbdg.mat)
 #      * [NCFparms.mat](templates/easy/PTmats/NCFparms.mat)
 #      * [NImParameters.mat](templates/easy/PTmats/NImParameters.mat)
 #      * [NPTdirectParameters.mat](templates/easy/PTmats/NPTdirectParameters.mat)
 #      * [NPTmapDirect.mat](templates/easy/PTmats/NPTmapDirect.mat)
 #      * [NPTmapSearch.mat](templates/easy/PTmats/NPTmapSearch.mat)
 #      * [NPTsearchParameters.mat](templates/easy/PTmats/NPTsearchParameters.mat)
 #
 # To analyze a new Q-HTCP experiment:
 #   * Open the EASY Software.
@@ -407,35 +413,30 @@ module easy
 #
 # Issues:
 #    * We need full documentation for all of the current workflow. There are different documents that need to be integrated. This will need to be updated as we make improvements to the system.
-#    * MasterPlate_ file must have ydl227c in orf column, or else it Z_interaction.R will fail, because it can’t calculate shift values.
+#    * MasterPlate_ file must have ydl227c in orf column, or else it Z_interaction.R will fail, because it can't calculate shift values.
-#    * Make sure there are no special characters; e.g., (), “, ‘, ?, etc.;  dash and underscore are ok as delimiters
+#    * Make sure there are no special characters; e.g., (), “, ', ?, etc.;  dash and underscore are ok as delimiters
-#    * Drug_Media_ file must have letter character to be read as ‘text’.
+#    * Drug_Media_ file must have letter character to be read as 'text'.
 #    * MasterPlate_ file and DrugMedia_ are .xlsx or .xls, but !!Results_ is .txt.
-#    * In Z_interactions.R, does it require a zero concentration/perturbation (should we use zero for the low conc, even if it’s not zero), e.g., in order to do the shift correctly.
+#    * In Z_interactions.R, does it require a zero concentration/perturbation (should we use zero for the low conc, even if it's not zero), e.g., in order to do the shift correctly.
 #    * Need to enable all file types (not only .xls) as the default for GenerateResults (to select MP and DM files as .xlsx).
 #    * Explore differences between the ELR and STD files - 24_0414; John R modified Z script to format ELR file for Z_interactions.R analysis.
 #    * To keep time stamps when transferring with FileZilla, go to the transfer drop down and turn it on, see https://filezillapro.com/docs/v3/advanced/preserve-timestamps/ 
-#    * Could we change the ‘MasterPlateFiles’ folder label in EASY to ‘MasterPlate_DrugMedia’ (since there should be only one MP and there is also a DM file required?
+#    * Could we change the 'MasterPlateFiles' folder label in EASY to 'MasterPlate_DrugMedia' (since there should be only one MP and there is also a DM file required?
-#    * I was also thinking of adding a ‘MasterPlateFilesOnly’ folder to the QHTCP directory template where one could house different MPFiles (e.g., with and without damps, with and without Refs on all MPs, etc; other custom MPFiles, updated versions, etc)
+#    * I was also thinking of adding a 'MasterPlateFilesOnly' folder to the QHTCP directory template where one could house different MPFiles (e.g., with and without damps, with and without Refs on all MPs, etc; other custom MPFiles, updated versions, etc)
-#    * Currently updated files are in ‘23_1011_NewUpdatedMasterPlate_Files’ on Mac (yeast strains/23_0914…/)
+#    * Currently updated files are in '23_1011_NewUpdatedMasterPlate_Files' on Mac (yeast strains/23_0914…/)
-#    * For EASY to report cell array positions (plate_row_column) to facilitate analyzing plate artifacts. The MP File in Col 3 is called ‘LibraryLocation’ and is reported after ‘Specifics’ in the !!Results.
+#    * For EASY to report cell array positions (plate_row_column) to facilitate analyzing plate artifacts. The MP File in Col 3 is called 'LibraryLocation' and is reported after 'Specifics' in the !!Results.
 #    * Can EASY/StudiesQ-HTCP be updated at any time by rerunning with updated MP file (new information for gene, desc, etc)- or maybe better to always start with a new template?
 #    * Need to be aware of file formatting to avoid dates (e.g., with gene names like MAY24, OCT1, etc, and with plate locations 1E1, 1E2, etc)- this has been less of a problem. 
 #    * In StudiesQHTCP folders, remember to annotate Exp1, Exp2, in the StudyInfo.csv file.
-#    * Where are gene names called from for labeling REMc heatmaps, TSHeatmaps, Z-interaction graphs, etc? Is this file in the QHTCP ‘code’ folder, or is it in the the results file (and thus ultimately the MP file)?
+#    * Where are gene names called from for labeling REMc heatmaps, TSHeatmaps, Z-interaction graphs, etc? Is this file in the QHTCP 'code' folder, or is it in the the results file (and thus ultimately the MP file)?
 #    * Is it ok for a MasterPlate_ file to have multiple sheets (e.g., readme tab- is only the first tab read in)?
 #    * What are the rules for pulling information from the MasterPlateFile to the !!Results_ (e.g., is it the column or the Header Name, etc that is searched? Particular cells in the DrugMedia file?).
-#    * Modifier, Conc are from DM sheet, and refer to the agar media arrays. OrfRep is from MasterPlate_ File. ‘Specifics’ (Last Column) is experiment specific and accommodate designs involving differences across the multi-well liquid arrays. ‘StrainBkGrd’ (now ‘Library location’) is in the 3rd column and reported after ‘Specifics’ at the last col of the ‘!!Results..’ file.
+#    * Modifier, Conc are from DM sheet, and refer to the agar media arrays. OrfRep is from MasterPlate_ File. 'Specifics' (Last Column) is experiment specific and accommodate designs involving differences across the multi-well liquid arrays. 'StrainBkGrd' (now 'Library location') is in the 3rd column and reported after 'Specifics' at the last col of the '!!Results..' file.
 #    * Do we have / could we make an indicator- work in progress or idle/complete with MP/DM and after gen-report. Now, we can check for the MPDMmat.mat file, or we can look in PrintResults, but would be nice to know without looking there. 
-#    * File>>Load Experiment wasn’t working (no popup to redirect). Check this again.
+#    * File>>Load Experiment wasn't working (no popup to redirect). Check this again.
 easy() {
  debug "Running: ${FUNCNAME[0]}"
  #EASY="/mnt/data/EASY/EasyDev2024/BU/EASY240430AppExported/EstartConsole.m"
  EASY="/mnt/data/EASY/EasyDev2024/BU/EASY240505AppExported/EASYConsole.m"
  pushd "$SCAN_DIR" || return 1
  cat <<-EOF
 		To analyze a new Q-HTCP experiment:
 		  * Open the EASY Software.
@@ -473,31 +474,15 @@ easy() {
 		'NoGrowth_.txt', and 'GrowthOnly_.txt' files will be generated in the 'PrintResults' folder. 
 	EOF
  script="EASYConsole.m"
  pushd "$EASY_TEMPLATE_DIR" || return 1
  # Launch graphical matlab if the user wants
-  ! ((YES)) && ask "Start EASY in MATLAB? This requires a GUI." && matlab -nosplash -r "$EASY"
+  ! ((YES)) && ask "Start EASY in MATLAB? This requires a GUI." && matlab -nosplash -r "$script"
  popd || return 1
-  # glob EASY output and make sure it exists
+  # Use the function return code see if we succeeded
-  # currently this is just for informative purposes of how to glob some of the EASY output files
+  get_easy_results "$SCAN_DIR" || return 1
  # The EASY output files need to be standardized
  shopt -s nullglob
  EASY_RESULTS_DIRS=( Results* )
  shopt -u nullglob
  [[ ${#EASY_RESULTS_DIRS} -ge 1 ]] || (echo "Missing EASY output"; exit 1)
  declare -a EASY_OUT_ARRAY
  for EASY_RESULTS_DIR in "${EASY_RESULTS_DIRS[@]}"; do
    [[ -d $EASY_RESULTS_DIR ]] && echo "Found EASY Results directory: $EASY_RESULTS_DIR"
    EASY_PRINT_RESULTS_DIR="$EASY_RESULTS_DIR/PrintResults"
    [[ -d $EASY_PRINT_RESULTS_DIR ]] && echo "Found EASY PrintResults directory: $EASY_PRINT_RESULTS_DIR"
    EASY_PRINT_RESULTS_FILES=(
      "$EASY_PRINT_RESULTS_DIR/!!"* 
      "$EASY_PRINT_RESULTS_DIR"/NoGrowth_*.txt
      "$EASY_PRINT_RESULTS_DIR"/GrowthOnly_*.txt
    )
    EASY_OUT_ARRAY+=("$EASY_RESULTS_DIR" "$EASY_PRINT_RESULTS_DIR" "${EASY_PRINT_RESULTS_FILES[@]}")
  done
  echo "EASY OUTPUT ARRAY: " "${EASY_OUT_ARRAY[@]}"
 }
@@ -515,14 +500,16 @@ module qhtcp
 # @section QHTCP
 # @description System for Multi-QHTCP-Experiment Gene Interaction Profiling Analysis
 # * Functional rewrite of REMcMaster3.sh
 # * Order the experiment names in the way you want them to appear in the REMc heatmaps
 # * Added a newline character to the end of StudyInfo.csv so it is a valid text file
 # TODO Suggest renaming StudiesQHTCP to something like qhtcp qhtcp_output or output
 # TODO Store StudyInfo somewhere better
 # TODO Move (hide) the study template somewhere else
 # TODO StudiesArchive should be smarter
 # Rerunning this module uses rsync --update to only copy files that are newer in the template
 # If you wish for the template to overwrite your changes, delete the file from your QHTCP project dir
 #
 # To create a new study (Experiment Specific Interaction Zscores generation)
 #
 # * StudyInfo.csv instructions:
 #   * In your files directory, open the /Code folder, edit the 'StudyInfo.csv' spreadsheet, and save it as a 'csv' file to give each experiment the labels you wish to be used for the plots and specific files. 
 #     * Enter the desired Experiment names- **order the names in the way you want them to appear in the REMc heatmaps; and make sure to run the front end programs (below) in the correct order (e.g., run front end in 'exp1' folder to call the !!Results file for the experiment you named as exp1 in the StudyInfo.csv file)   
@@ -540,14 +527,14 @@ module qhtcp
 #   * Do not double-click on the file from the directory. 
 #   * When prompted, navigate to the ExpJobs folder and the PrintResults folder within the correct job folder. 
 #   * Repeat this for every Exp# folder depending on how many experiments are being performed. 
-#   * Note: Before doing this, it’s a good idea to compare the ref and non-ref CPP average and median values. If they are not approximately equal, then may be helpful to standardize Ref values to the measures of central tendency of the Non-refs, because the Ref CPPs are used for the z-scores, which should be centered around zero.
+#   * Note: Before doing this, it's a good idea to compare the ref and non-ref CPP average and median values. If they are not approximately equal, then may be helpful to standardize Ref values to the measures of central tendency of the Non-refs, because the Ref CPPs are used for the z-scores, which should be centered around zero.
-#     * This script will copy the !!ResultsStd file (located in /PrintResults in the relevant job folder in /ExpJobs **rename this !!Results file before running front end; we normally use the ‘STD’ (not the ‘ELR’ file) chosen to the Exp# directory as can be seen in the “Current Folder” column in MATLAB, and it updates ‘StudiesDataArchive.txt’ file that resides in the /StudiesQHTCP folder. ‘StudiesDataArchive.txt’ is a log of file paths used for different studies, including timestamps.
+#     * This script will copy the !!ResultsStd file (located in /PrintResults in the relevant job folder in /ExpJobs **rename this !!Results file before running front end; we normally use the 'STD' (not the 'ELR' file) chosen to the Exp# directory as can be seen in the “Current Folder” column in MATLAB, and it updates 'StudiesDataArchive.txt' file that resides in the /StudiesQHTCP folder. 'StudiesDataArchive.txt' is a log of file paths used for different studies, including timestamps.
 #
-# Do this to document the names, dates and paths of all the studies and experiment data used in each study. Note, one should only have a single ‘!!Results…’ file for each /Exp_ to prevent ambiguity and confusion. If you decide to use a new or different ‘!!Results…’ sheet from what was used in a previous “QHTCP Study”, remove the one not being used. NOTE:  if you copy a ‘!!Results…’ file in by hand, it will not be recorded in the ‘StudiesDataArchive.txt’ file and so will not be documented for future reference. If you use the ExpFrontend.m utility it will append the new source for the raw !!Results… to the ‘StudiesDataArchive.txt’ file.  
+# Do this to document the names, dates and paths of all the studies and experiment data used in each study. Note, one should only have a single '!!Results…' file for each /Exp_ to prevent ambiguity and confusion. If you decide to use a new or different '!!Results…' sheet from what was used in a previous “QHTCP Study”, remove the one not being used. NOTE:  if you copy a '!!Results…' file in by hand, it will not be recorded in the 'StudiesDataArchive.txt' file and so will not be documented for future reference. If you use the ExpFrontend.m utility it will append the new source for the raw !!Results… to the 'StudiesDataArchive.txt' file.  
-# As stated above, it is advantageous to think about the comparisons one wishes to make so as to order the experiments in a rational way as it relates to the presentation of plots.  That is, which results from sheets and selected ‘interaction … .R’, user modified script, is used in /Exp1, Exp2, Exp3 and Exp4 as explained in the following section.
+# As stated above, it is advantageous to think about the comparisons one wishes to make so as to order the experiments in a rational way as it relates to the presentation of plots.  That is, which results from sheets and selected 'interaction … .R', user modified script, is used in /Exp1, Exp2, Exp3 and Exp4 as explained in the following section.
 # TODO MUST CLEAN UP QHTCP TEMPLATE DIRECTORY
 #
-# Code/Directory Structure:
+# Template:
 # templates/qhtcp
 #  * [A_QHTCP Study Design and Notes](templates/qhtcp/A_QHTCP Study Design and Notes)
 #    * [A_Suggestions for system improvements.odt](templates/qhtcp/A_QHTCP Study Design and Notes/A_Suggestions for system improvements.odt)
@@ -851,7 +838,7 @@ module qhtcp
 # How-To Procedure: Execute a Multi-experiment Study
 #  * Consider the goals of the study and design a strategy of experiments to include in the study. 
 #  * Consider the quality of the experiment runs using EZview to see if there are systematic problems that are readily detectable.
-#    * In some cases, one may wish to design a ‘pilot’ study for discovery purposes. 
+#    * In some cases, one may wish to design a 'pilot' study for discovery purposes. 
 #    * There is no problem doing that, just take a template study, copy and rename it as XYZpilotStudy etc. 
 #    * However, careful examination of the experimental results using EZview will likely save time in the long run. 
 #    * One may be able to relatively quickly run the interaction Z scores (the main challenge there is the user creation of customized interaction… .R code. 
@@ -860,11 +847,11 @@ module qhtcp
 #
 qhtcp() {
  debug "Running: ${FUNCNAME[0]}"
-  QHTCP_TEMPLATE_DIR="$SCRIPT_DIR/templates/qhtcp"
+
  STUDY_TEMPLATE_DIR="$QHTCP_TEMPLATE_DIR/ExpTemplate"
  OUTPUT_DIR="/mnt/data/StudiesQHTCP"
  STUDIES_ARCHIVE="$OUTPUT_DIR/StudiesDataArchive.txt"
  QHTCP_DIR="$OUTPUT_DIR/$PROJECT"
  STUDY_INFO="$QHTCP_DIR/Code/StudyInfo.csv"
  if [[ -d $QHTCP_DIR ]]; then
    echo "A project already exists at $QHTCP_DIR"
@@ -880,9 +867,8 @@ qhtcp() {
  fi
  # Print current studies
  STUDY_INFO="$QHTCP_DIR/Code/StudyInfo.csv"
  [[ -f $STUDY_INFO ]] && 
-  echo "Current studies from $STUDY_INFO" &&
+  echo "Current studies from $STUDY_INFO: " &&
  cat "$STUDY_INFO"
  # Ask user to edit STUDY_INFO
@@ -891,6 +877,7 @@ qhtcp() {
 			Give each experiment the labels you wish to be used for the plots and specific files. 
 			Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps
 		EOF
    read -r -p "Press enter to continue"
    nano "$STUDY_INFO"
  fi
@@ -899,35 +886,99 @@ qhtcp() {
  # Initialize missing dirs
  for s in "${STUDIES_NUM[@]}"; do
-    study_dir="$QHTCP_DIR/Exp$s"
+    STUDY_DIR="$QHTCP_DIR/Exp$s"
-    if ! [[ -d $study_dir ]]; then
+    if ! [[ -d $STUDY_DIR ]]; then
-      if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$study_dir"; then
+      if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
-        err "Could not copy $STUDY_TEMPLATE_DIR template to $study_dir"
+        err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
        continue
      fi
    fi
  done
  unset STUDY_DIR
-  mat_exp_frontend
+  # Replacing ExpFrontend.m
  get_easy_results "$SCAN_DIR" || (err "No EASY results found in $SCAN_DIR"; return 1)
  # Set the right results directory
  # TODO eventually we could run this on multiple results dirs simultaneously with some refactoring
  if ((YES)) || [[ ${#EASY_RESULTS_DIRS[@]} -eq 1 ]]; then
    # In automatic mode just choose the first OUT DIR in the list
    EASY_RESULT_DIR="${EASY_RESULTS_DIRS[0]}" # TODO right now just choose
  else
    echo "Multiple EASY results dirs found in $SCAN_DIR"
    echo "Here is a list: "
    for (( i=0; i<${#EASY_RESULTS_DIRS[@]}; i++ )); do
      printf "%d. %s\n" "$((i+1))" "${EASY_RESULTS_DIRS[i]}"
    done
    printf "%s\n" "${EASY_RESULTS_DIRS[@]}"
    read -r -p "Enter the item number to select: " response
    response=$(( response - 1 )) # bash arrays use zero indexing
    EASY_RESULTS_DIR="${EASY_RESULTS_DIRS[$response]}"
  fi
-
+  # TODO probably broken
-  for s in "${STUDIES_NUM[@]}"; do
+  EASY_RESULTS_FILES=("$EASY_RESULTS_DIR/"*"/PrintResults/!!"*) 
-    study_dir="$QHTCP_DIR/Exp$s"
+  # Create studies archive file if missing
-    # Z_InteractionTemplate.R
+  if ! [[ -d $STUDIES_ARCHIVE ]]; then
-    r_interactions "$study_dir" "!!Results"* 
+    header=(StudyDate tStudyName StudyPath ExpNum ExpDate ExpPath ResultFile)
    printf "%s\t" "${header[@]}" > "$STUDIES_ARCHIVE"
  fi
  # TODO Add them all to StudiesDataArchive?
  for f in "${EASY_RESULTS_FILES[@]}"; do
    for s in "${STUDIES_NUM[@]}"; do
      # Trying to match old ExpFrontend
      printf "%s\t" \
        "${DATE//_/}" "$PROJECT" "$QHTCP_DIR" "Exp$s" \
        "$PROJECT_DATE" "$SCAN_DIR" "$EASY_RESULT_DIR" "${f##*/}" \
        >> "$STUDIES_ARCHIVE"
    done
  done
-    # Enter REMc directory to run the scripts there
+  # Run R interactions script on all studies
-    pushd "$QHTCP_DIR/REMc" || return 1
+  for s in "${STUDIES_NUM[@]}"; do
-
+    STUDY_DIR="$QHTCP_DIR/Exp$s"
-    # Run modules
+    pushd "$STUDY_DIR" || return 1
-    r_join_interact
+    r_interactions \
-    java_extract
+      "$STUDY_DIR"\
-    r_add_shift_values
+      "$STUDY_INFO"\
-    r_heat_maps_zscores
+      "/ZScores/" \
-    r_heat_maps_homology
+      "../Code/SGD_features.tab" \
      5
    popd || return 1
  done
  # Global modules
  remc
 }
 module remc
 # @section GTF
 # @description GTF module for QHTCP
 # TODO which components of remc can be parallelized?
 # The submodules in remc really like to be run from the REMc dir
 # so we pop in and out for now
 # NOTE the remc modules could use some love
 #   * Don't cd within scripts, it's confusing
 #   * Use arguments to pass configuration variables
 #     * This allows us to abstract the program away in script-run-workflow and treat it like a module
 remc() {
  debug "Running: ${FUNCNAME[0]}"
  # Enter REMc directory to run the scripts there
  pushd "$QHTCP_DIR/REMc" || return 1
  # Run modules
  # If any modules fail the rest will not run, this is fundamental to module design
  # Remove trailing && to run regardless
  # TODO can this be
  r_join_interact &&
  java_extract &&
  r_add_shift_values &&
  r_heat_maps_zscores &&
  r_heat_maps_homology &&
  popd || return 1
 }
@@ -982,9 +1033,14 @@ gta() {
 submodule mat_exp_frontend
 # @description Run the ExpFrontend.m program
 # This submodule:
 #   * Pushes into the Study template directory (ExpTemplate)
 #   * Prompts the user to run ExpFrontend.m
 #   * Pops out
 # NOTES:
 #   * ExpFrontend.m should be or is being rewritten
 mat_exp_frontend() {
  debug "Running: ${FUNCNAME[0]}"
  # MATLAB stuff
  cat <<-EOF
 		ExpFrontend.m was made for recording into a spreadsheet 
 		('StudiesDataArchive.txt') the date and files used (i.e., directory paths to the
@@ -1005,11 +1061,12 @@ mat_exp_frontend() {
 		Repeat this for every Exp# folder depending on how many experiments are being performed. 
 		The Exp# folder must correspond to the StudyInfo.csv created above. 
 	EOF
  script="ExpFrontend.m"
  if ! ((YES)) && 
-  ask "Start MATLAB to run ExpFrontend.m? This requires a GUI."; then
+  ask "Start MATLAB to run $script? This requires a GUI."; then
-    matlab -nosplash
+    matlab -nosplash -r "$script"
  fi
  [[ -f $STUDIES_ARCHIVE ]] || (err "$STUDIES_ARCHIVE missing"; return 1)
 }
@@ -1020,51 +1077,48 @@ submodule r_interactions
 # @arg $1 string The current working directory
 r_interactions() {
  debug "Running: ${FUNCNAME[0]}"
  cat <<-EOF
-In each /Exp# folder, rename the Z_InteractionTemplate.R script according to the experiment focus
+		In each /Exp# folder, rename the Z_InteractionTemplate.R script according to the experiment focus
-Example:  Interaction, Experimenter Initials, Experiment Focus --> ‘int_RM_2PE.R’
+		Example:  Interaction, Experimenter Initials, Experiment Focus --> 'int_RM_2PE.R'
-5. Open the renamed interaction script, and edit each one beginning at the ‘++BEGIN USER DATA SELECTION++’
+		5. Open the renamed interaction script, and edit each one beginning at the '++BEGIN USER DATA SELECTION++'
-This is designed so that the data of interest for each experiment is appropriately selected from the !!Results…txt file
+		This is designed so that the data of interest for each experiment is appropriately selected from the !!Results…txt file
-The user can edit, step through, and test the R script without running through the whole routine by observing the resultant data table created in RStudio.
+		The user can edit, step through, and test the R script without running through the whole routine by observing the resultant data table created in RStudio.
-The Z_InteractionTemplate.R script has a collection of code lines that have been used for prior analyses (generally to select data from various !!Results…txt files), which may be commented out (if not relevant), reused as needed, and/or modified for a new study. These include lines associated with the removal of ‘dAmps’, specific concentrations, and items described in the ‘Specifics’ and ‘Media’, i.e., information specific to a particular experiment design. There are also code lines to replace gene names ‘OCT1/YKL134C’ /’MAY24/YPR153W’ and that get converted to date format in excel, by using only the ORF name and to remove data rows with ‘Blank’ listed; these lines of code convenient to reuse. Hopefully, these code lines can be used, commented out, or adapted to aid the user in modifying this section to the specific data requirements of the study. As a new user data filter code is developed for each ‘Study’ (and vetted), those lines can be added to the InteractionTemplate230119.R code in the /StudyTemplate folders to aid in future studies.
+		The Z_InteractionTemplate.R script has a collection of code lines that have been used for prior analyses (generally to select data from various !!Results…txt files), which may be commented out (if not relevant), reused as needed, and/or modified for a new study. These include lines associated with the removal of 'dAmps', specific concentrations, and items described in the 'Specifics' and 'Media', i.e., information specific to a particular experiment design. There are also code lines to replace gene names 'OCT1/YKL134C' /'MAY24/YPR153W' and that get converted to date format in excel, by using only the ORF name and to remove data rows with 'Blank' listed; these lines of code convenient to reuse. Hopefully, these code lines can be used, commented out, or adapted to aid the user in modifying this section to the specific data requirements of the study. As a new user data filter code is developed for each 'Study' (and vetted), those lines can be added to the InteractionTemplate230119.R code in the /StudyTemplate folders to aid in future studies.
-6. Open a terminal, navigate to each /Exp# folder, and execute the (customized) ‘Z_InteractionTemplate_…” script by using the command line below:
+		6. Open a terminal, navigate to each /Exp# folder, and execute the (customized) 'Z_InteractionTemplate_…' script by using the command line below:
 		Rscript RenamedInteractionTemplate.R \!\!Results… .txt
 		**need to change wording to choose SD of Delta_Background to exclude Data from analysis.
 		[1] "Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean"
 		Enter a Standard Deviation value to noise filter >>
 		[1] Enter Standard deviation value for removing data for cultures due to high background (e.g., contaminated cultures). Generally set this very high (e.g., '20') on the first run in order NOT to remove data, e.g. '20'. Review QC data and inspect raw image data to decide if it is desirable to remove data, and then rerun analysis. 
 		Enter a Background SD threshold for EXCLUDING culture data from further analysis:
-Rscript RenamedInteractionTemplate.R \!\!Results… .txt
+		The script will request for the user to input a 'Background Standard Deviation Value'.  This Background value removes data where there is high pixel intensity in the background regions of a spot culture (i.e., suspected contamination). 5 is a minimum recommended value, because lower values result in more data being removed, and often times this is undesirable if contamination occurs late after the carrying capacity of the yeast culture is reached. This is most often “trial and error”, meaning there is a 'Frequency_Delta_Background.pdf' report in the /Exp_/ZScores/QC/ folder to evaluate whether the chosen value was suitable (and if not the analysis can simply be rerun with a more optimal choice). In general, err on the high side, with BSD of 10 or 12…. One can also use EZview to examine the raw images and individual cultures potentially included/excluded as a consequence of the selected value. Background values are reported in the results sheet and so could also be analyzed there.. 
-**need to change wording to choose SD of Delta_Background to exclude Data from analysis.
+			(For new terminal users, directory navigation tips are described below)
-[1] "Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean"
+		To navigate to the directory one can use the directory GUI (in X2Go, use the GUI to navigate to desired operating directory and then from the 'File' menu, choose “Open in Terminal')
-Enter a Standard Deviation value to noise filter >>
+		Alternatively, navigate there through the terminal window: 'pwd' “prints the current working directory”, 'ls' “lists” the subfolders in the current directory. 'cd'' followed by the name of the 'subdirectory' will move down into it. “cd .. “ changes to the parent directory
-
+		The tab key can be used to autofill unique characters after typing the initial letters of a folder or file you wish to call. 
 [1] Enter Standard deviation value for removing data for cultures due to high background (e.g., contaminated cultures). Generally set this very high (e.g., ‘20’) on the first run in order NOT to remove data, e.g. ‘20’. Review QC data and inspect raw image data to decide if it is desirable to remove data, and then rerun analysis. 
 Enter a Background SD threshold for EXCLUDING culture data from further analysis:
-The script will request for the user to input a ‘Background Standard Deviation Value’.  This Background value removes data where there is high pixel intensity in the background regions of a spot culture (i.e., suspected contamination). 5 is a minimum recommended value, because lower values result in more data being removed, and often times this is undesirable if contamination occurs late after the carrying capacity of the yeast culture is reached. This is most often “trial and error”, meaning there is a ‘Frequency_Delta_Background.pdf’ report in the /Exp_/ZScores/QC/ folder to evaluate whether the chosen value was suitable (and if not the analysis can simply be rerun with a more optimal choice). In general, err on the high side, with BSD of 10 or 12…. One can also use EZview to examine the raw images and individual cultures potentially included/excluded as a consequence of the selected value. Background values are reported in the results sheet and so could also be analyzed there.. 
+		The template structure above assists the user with organization and management of Q-HTCP files and provides a uniform directory structure to streamline reference across different users and experiments.
-
+		Since we are systematically comparing perturbations, most Q-HTCP studies will consist of either 2 or 4 experiment subfolders. 
-	(For new terminal users, directory navigation tips are described below)
+		The Zscores files are used for subsequent analyses, including REMc, GTA and Term Specific Heatmaps. These further analyses are described below and can be completed in any order and/or concurrently from separate terminals.
-To navigate to the directory one can use the directory GUI (in X2Go, use the GUI to navigate to desired operating directory and then from the ‘File’ menu, choose “Open in Terminal’)
+		**Annotate Files produced and comment out code that produces files that are obsolete or clutter.
 Alternatively, navigate there through the terminal window: ‘pwd’ “prints the current working directory”, ‘ls’ “lists” the subfolders in the current directory. ‘cd’’ followed by the name of the ‘subdirectory’ will move down into it. “cd .. “ changes to the parent directory
 The tab key can be used to autofill unique characters after typing the initial letters of a folder or file you wish to call. 
 The template structure above assists the user with organization and management of Q-HTCP files and provides a uniform directory structure to streamline reference across different users and experiments.
 Since we are systematically comparing perturbations, most Q-HTCP studies will consist of either 2 or 4 experiment subfolders. 
 The Zscores files are used for subsequent analyses, including REMc, GTA and Term Specific Heatmaps. These further analyses are described below and can be completed in any order and/or concurrently from separate terminals.
 **Annotate Files produced and comment out code that produces files that are obsolete or clutter.
 	EOF
-  script="$1/Z_InteractionTemplate.R"
+  script="Z_InteractionTemplate.R"
  debug "$RSCRIPT $script"
  "$RSCRIPT" "$script" 
  #   1. Path to input file
-#   2. /output/ directory
+  #   2. /output/ directory
-#   3. Path to StudyInfo.csv
+  #   3. Path to StudyInfo.csv
-#   4. Standard deviation value
+  #   4. Standard deviation value
 }
@@ -1262,15 +1316,20 @@ r_compile_gtf() {
  "$RSCRIPT" "$script"
 }
 submodule get_studies
 # @description Parse study names from StudyInfo.csv files
 # TODO: This whole submodule should eventually be either
 #    * Removed
-#    * Expanded into a file that stores all project/study settings
+#    * Expanded into a file that stores all project/study settings (database)
 # I had to had a new line to the end of StudyInfo.csv, may break things?
 # Example:
 #   ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy
 #   1,ExpName1,NA,NA,UserInitials
 #   2,ExpName2,NA,NA,UserInitials
 #   3,ExpName3,NA,NA,UserInitials 
 # @arg $1 string File to read
 get_studies() {
  debug "Running: ${FUNCNAME[0]}"
  declare -ga STUDIES_NUM=()
  while IFS=',' read -r col1 _; do # split on comma, get second col
    STUDIES_NUM+=("$col1")
@@ -1278,6 +1337,40 @@ get_studies() {
 }
 submodule get_easy_results # lol
 # @description Gets info about EASY output in the project's scans directory
 # TODO: Standardize EASY output, it's hard to understand
 # @exitcode 0 if at least one results directory exists
 # @exitcode 1 if no results directories exist
 # @set EASY_RESULTS_DIRS array Globbed results files from SCAN_DIR/
 # @set EASY_RESULTS_FILES array Globbed results files from SCAN_DIR/
 # @arg $1 string Project scans (ExpJobs)
 get_easy_results() {
  debug "Running: ${FUNCNAME[0]}"
  # Glob EASY output dirs
  shopt -s nullglob
  EASY_RESULTS_DIRS=( "$1"/Results*/ ) # TODO this is probably broken
  [[ ${#EASY_RESULTS_DIRS} -ge 1 ]] || (echo "Missing EASY output"; return 1)
  shopt -u nullglob
  # This section is mostly to provide some example globbing
  declare -a EASY_RESULTS_ARRAY=()
  for EASY_RESULTS_DIR in "${EASY_RESULTS_DIRS[@]}"; do
    [[ -d $EASY_RESULTS_DIR ]] && echo "Found EASY Results directory: $EASY_RESULTS_DIR"
    EASY_PRINT_RESULTS_DIR="$EASY_RESULTS_DIR/PrintResults"
    [[ -d $EASY_PRINT_RESULTS_DIR ]] && echo "Found EASY PrintResults directory: $EASY_PRINT_RESULTS_DIR"
    EASY_PRINT_RESULTS_FILES=(
      "$EASY_PRINT_RESULTS_DIR/!!"* 
      "$EASY_PRINT_RESULTS_DIR"/NoGrowth_*.txt
      "$EASY_PRINT_RESULTS_DIR"/GrowthOnly_*.txt
    )
    EASY_RESULTS_ARRAY+=("$EASY_RESULTS_DIR" "$EASY_PRINT_RESULTS_DIR" "${EASY_PRINT_RESULTS_FILES[@]}")
  done
  [[ ${#EASY_RESULTS_DIRS[@]} -gt 0 ]] # TODO may need to change this to something more specific
 }
 submodule documentation
 # @section Documentation
 # @description Generates markdown documentation from this script using shdoc
@@ -1302,8 +1395,15 @@ main() {
  SCRIPT=$(realpath -s "${BASH_SOURCE[0]}")
  SCRIPT_DIR=$(dirname "$SCRIPT")
  # Templates
  QHTCP_TEMPLATE_DIR="$SCRIPT_DIR/templates/qhtcp"
  STUDY_TEMPLATE_DIR="$QHTCP_TEMPLATE_DIR/ExpTemplate"
  EASY_TEMPLATE_DIR="$SCRIPT_DIR/templates/easy"
  DATE="$(date +%y_%m%d)"
  # Set the automatic project directory prefix
-  PROJECT_PREFIX="$(whoami)_$(date +%y_%m_%d)"
+  PROJECT_PREFIX="${DATE}_$(whoami)_" # reversed these so easier to sort and parse date
  san() { [[ $1 =~ .+_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_.+ ]]; } # sanitizer regex for prefix
  declare -ag PROJECTS=() # this array will hold all of the projects for this run
@@ -1312,8 +1412,12 @@ main() {
  # Prompt user for the PROJECT if we still don't have one
  if [[ ${#PROJECTS[@]} -eq 0 ]]; then # still allows for environment overrides
-    ask_pn
+    echo "You will be prompted to enter a project name"
-    PROJECTS+=("$PROJECT")
+    echo "Hit enter when done"
    while :; do
      ask_pn || break
      PROJECTS+=("$PROJECT")
    done
  fi
  for i in "${!PROJECTS[@]}"; do
@@ -1357,13 +1461,11 @@ main() {
  # Loop over projects
  for PROJECT in "${PROJECTS[@]}"; do 
    SCAN_DIR="$SCANS_DIR/$PROJECT"
-    [[ -d $SCAN_DIR ]] || mkdir -p "$SCAN_DIR"
+    PROJECT_DATE=${PROJECT%"${PROJECT#??_????}"} # e.g. 24_0723
    pushd "$SCAN_DIR" || return 1
    # Run selected modules
    for m in "${MODULES[@]}"; do
      ask "Run $m" && "$m"
    done
    popd || return 1
  done
 }
--- a/workflow/templates/qhtcp/ExpTemplate/ExpFrontend.m
+++ b/workflow/templates/qhtcp/ExpTemplate/ExpFrontend.m
@@ -1,57 +1,55 @@
-%Frontend240417.m
+% FrontEnd utility to copy source result sheet into Exp_ folders of
-%augmented ExpFrontend to provide a quick text archive and a robust
+% StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path
-%database ammenable archive
+% capture to the StudiesDataArchieve.txt study log.
-%FrontEnd utility to copy source result sheet into Exp_ folders of
+% Select, copy and Capture Study Exp_ details to study log
 %StudiesQHTCP/StudyName/Exp1(2,3,4). This allow the automation of path
 %capture to the StudiesDataArchieve.txt study log.
 %Select, copy and Capture Study Exp_ details to study log
 %Exp meta data collection 
-W=pwd;
+
 % Set path variables
 wCodeDir=pwd;
 Wstudy=fullfile('../', wCodeDir);
 studyDate=datetime('now');
 %Load results file meta data  into workspace
 try 
-ExpLabel= strcat('Exp',W(end))
+    ExpLabel= strcat('Exp',wCodeDir(end))
-questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex'));
+    questdlg('\fontsize{20} Select the !!Results File','File Selection','OK', struct('Default','OK','Interpreter','tex'));
-[resFile,resPath]= uigetfile('*.txt')
+    [resFile,resPath]= uigetfile('*.txt')
-copyfile((fullfile(resPath,resFile)),fullfile(W))
+    copyfile((fullfile(resPath,resFile)),fullfile(wCodeDir))
-resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match'))
+    resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match'))
-cd ..
+    cd ..
 Wstudy= pwd
 studyDate= datetime('now');
 S.sDate(1) = {studyDate};
 if ispc
    lastSep=max(strfind(Wstudy,'\'))
    studyName= Wstudy((lastSep+1):end)
 else
    lastSep=max(strfind(Wstudy,'/'))
    studyName= Wstudy((lastSep+1):end)
 end
-S.sDate(1)= {studyDate};
+    if ispc
-S.sName(1)= {studyName}
+        lastSep=max(strfind(Wstudy,'\'))
-S.sPath(1)= {Wstudy}
+        studyName=Wstudy((lastSep+1):end)
-S.ELabel(1)= {ExpLabel}
+    else
-S.EresDate(1)= {resDate}
+        lastSep=max(strfind(Wstudy,'/'))
-S.EresFile(1)= {resFile}
+        studyName=Wstudy((lastSep+1):end)
-S.EresPath(1)= {resPath}
+    end
-cd ..
+    % Build the study info array
    S.sDate(1)= {studyDate};
    S.sName(1)= {studyName}
    S.sPath(1)= {Wstudy}
    S.ELabel(1)= {ExpLabel}
    S.EresDate(1)= {resDate}
    S.EresFile(1)= {resFile}
    S.EresPath(1)= {resPath}
-fid = fopen('StudiesDataArchive.txt','a');  
+    cd ..
-fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n');
+
-fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1});
+    fid = fopen('StudiesDataArchive.txt','a');  
-fclose(fid);
+    fprintf(fid,'StudyDate\tStudyName\tStudyPath\tExpNum\tExpDate\tExpPath\tResultFile\n');
-fclose('all');
+    fprintf(fid, '%s\t %s\t %s\t %s\t %s\t %s\t %s \n',S.sDate{1},S.sName{1},S.sPath{1},S.ELabel{1},S.EresDate{1},S.EresPath{1},S.EresFile{1});
    fclose(fid);
    fclose('all');
 catch
-    cd(W)
+    cd(wCodeDir)
    disp('Error: Unable to Execute ExpFrontend.m')
 end
-cd(W)
+cd(wCodeDir)
 pwd
 %*************************************************************************************
 %*************************************************************************************
@@ -66,12 +64,12 @@ nowNumFNm= strcat((int2str(now)),'.mat') % from previous section incase the /Stu
 %capture the /StudiesQHTCP directory for storing log data
 cd ../..
 logPath= pwd  %is /.../Exp_
-cd(W)
+cd(wCodeDir)
 %Try to load an existing data set from previous Frontend calls
 try  %end ~ln121
  load(fullfile(logPath,'.studyLog.mat')) %load(fullfile('../../','studyLog.mat'))
 catch  %If no studyLog.mat found [Initial First Entry]
-    ExpLabel= strcat('Exp',W(end))
+    ExpLabel= strcat('Exp',wCodeDir(end))
    resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match'))
    cd .. %move up to current study folder from ../Exp_ folder; 
    %Study meta data collection
@@ -102,7 +100,7 @@ catch  %If no studyLog.mat found [Initial First Entry]
   S.EresFile(1)= {resFile}
   S.EresPath(1)= {resPath}
- cd(W)
+ cd(wCodeDir)
 logFiletxt= fullfile(logPath,'StudiesDataArchive4DB.txt') %relative .txt path 
 %Initialize the StudiesDataArchive.txt file with the First row of meta data
@@ -127,7 +125,7 @@ end  %end for try  for the First entry only
 %Begin routine for all Entries After the Initial data entry**************** 
-ExpLabel= strcat('Exp',W(end))
+ExpLabel= strcat('Exp',wCodeDir(end))
 resDate= char(regexp(resFile, '(\d\d\_\d\d\d\d)|( \d\d\_\d\d\d\d|\d\d\d\d\d\d)','match')) %Capture date from !!Results file
 cd .. %Move to the current study folder
@@ -203,12 +201,12 @@ end
 fclose('all');
 catch
-    cd(W) %Return to the location of the frontend.m code (/studyName/Exp_)
+    cd(wCodeDir) %Return to the location of the frontend.m code (/studyName/Exp_)
    fclose('all');
    clear S %clear data structure variable 'S.'
 end
-cd(W) %Return to the location of the frontend.m code (/studyName/Exp_)
+cd(wCodeDir) %Return to the location of the frontend.m code (/studyName/Exp_)
 clear all %clear workspace variables
--- a/workflow/templates/qhtcp/ExpTemplate/Z_InteractionTemplate.R
+++ b/workflow/templates/qhtcp/ExpTemplate/Z_InteractionTemplate.R
@@ -1,8 +1,8 @@
 #!/usr/bin/env R
 # Based on InteractionTemplate.R which is based on Sean Santos's Interaction_V5 script
 #
-# Updated 2024 Bryan C Roessler to improve file operations and portability
+# Updated 240723 Bryan C Roessler to improve file operations and portability
-# NOTE: The script now has 1 REQUIRED argument and 4 OPTIONAL arguments: 
+# NOTE: The script now has 4 additional OPTIONAL arguments: 
 #   1. Path to input file
 #   2. /output/ directory
 #   3. Path to StudyInfo.csv
@@ -80,7 +80,6 @@ expNumber <- as.numeric(sub("^.*?(\\d+)$", "\\1", getwd()))
 Labels[expNumber,3] <- delBGFactor
 write.csv(Labels,file=studyInfo,row.names = FALSE)
 ###############################################################################
 ################### BEGIN USER DATA SELECTION SECTION #########################
 ###############################################################################