diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index a9b3d12d..f35975df 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -9,7 +9,7 @@ # @brief One script to rule them all (see: xkcd #927) # @description A flexible yet opinionated analysis framework for the Hartman Lab # There should be at least 4 subdirectories to organize Q-HTCP data and analysis. The parent directory is simply called 'Q-HTCP' and the 4 are subdirectories described below (Fig. 1): -# * **ExpJobs** +# * **scans/** # * This directory contains raw image data and image analysis results for the entire collection of Q-HTCP experiments. # * We recommend each subdirectory within 'ExpJobs" should represent a single Q-HTCP experiment and be named using the following convention (AB yyyy_mmdd_PerturbatationsOfInterest): experimenter initials ('AB '), date ('yyyy_mmdd_'), and brief description ('drugs_medias'). # * Each subdirectory contains the Raw Image Folders for that experiment (a series of N folders with successive integer labels 1 to N, each folder containing the time series of images for a single cell array). It also contains a user-supplied subfolder, which must be named ''MasterPlateFiles" and must contain two excel files, one named 'DrugMedia_experimentdescription' and the other named 'MasterPlate_experimentdescription'. The bolded part of the file name including the underscore is required. The italicized part is optional description. Generally the 'DrugMedia_' file merits description. @@ -18,7 +18,7 @@ # * Together they encapsulate and define the experimental design. # * The QHTCPImageFolders and 'MasterPlateFiles' folder are the inputs for image analysis with EASY software. # * As further described below, EASY will automatically generate a 'Results' directory (within the ExpJobs/'ExperimentJob' folder) with a name that consists of a system-generated timestamp and an optional short description provided by the user (Fig.2). The 'Results' directory is created and entered, using the "File >> New Experiment" dropdown in EASY. Multiple 'Results' files may be created (and uniquely named) within an 'ExperimentJob' folder. -# * **EASY** +# * **apps/easy/** # * This directory contains the GUI-enabled MATLAB software to accomplish image analysis and growth curve fitting. # * EASY analyzes Q-HTCP image data within an 'ExperimentJob'' folder (described above; each cell array has its own folder containing its entire time series of images). # * EASY analysis produces image quantification data and growth curve fitting results for each cell array; these results are subsequently assembled into a single file and labeled, using information contained in the 'MasterPlate_' and 'DrugMedia_' files in the 'MasterPlateFiles' subdirectory. @@ -26,13 +26,11 @@ # * The /EASY directory is simply where the latest EASY version resides (additional versions in development or legacy versions may also be stored there). # * The raw data inputs and result outputs for EASY are kept in the 'ExpJobs' directory. # * EASY also outputs a '.mat' file that is stored in the 'matResults' folder and is named with the TimeStamp and user-provided name appended to the 'Results' folder name when 'New Experiment' is executed from the 'File' Dropdown menu in the EASY console. -# * **EZview** +# * **apps/ezview/** # * This directory contains the GUI-enabled MATLAB software to conveniently and efficiently mine the raw cell array image data for a Q-HTCP experiment. # * It takes the Results.m file (created by EASY software) as an input and permits the user to navigate through the raw image data and growth curve results for the experiment. # * The /EZview provides a place for storing the the latest EZview version (as well as other EZview versions). # * The /EZview provides a GUI for examining the EASY results as provided in the …/matResults/… .mat file. -# * **StudiesQHTCP** -# * This directory contains the GUI-enabled JAVA software composite (MATLAB, JAVA, R, Python, Perl, Shell) that takes growth curve results (created by EASY software) as an input and successively generates interaction Z-score results, which are used for graphing gene interactions, Clustering, Gene Ontology analysis, and other ways of interpreting and visualizing the experimental quality and outcomes. {The /StudiesQHTCP folder contains the ordered command line scripts that call sets of other scripts to perform data selection and adaptation from the extracted text results spreadsheet found in the /ExpJobs/experiment name/Results…/PrintResults/ folder. In particular the 'user customize interactionCode4experiment.R' file. It also contains a multitude of R generated plots based on the selected data and possible adaptation. All clustering and Gene ontology analysis are derived from the 'ZScores_Interaction.csv' file found in the/ZScores subdirectory.} # * **Master Plates** # * This optional folder is a convenient place to store copies of the 'MasterPlate_' and a 'DrugMedia_' file templates, along with previously used files that may have been modified and could be reused or further modified to enable future analyses. # * These two file types are required in the 'MasterPlateFiles' folder, which catalogs experimental information specific to individual Jobs in the ExpJobs folder, as described further below. @@ -52,9 +50,10 @@ # * Local vars have a higher likelihood of being lower case, global vars are UPPER # # @option -p | --project= Include one or more projects in the analysis -# @option -i | --include= Include one or more modules in the analysis (default: all modules) -# @option -x | --exclude= Exclude one or more modules in the analysis -# @option -m | --markdown Generate the shdoc markdown file for this program +# @option -m | --module= Include one or more modules in the analysis (default: all modules) +# @option -s | --submodule= Pass arguments or commands to a submodule in the current project context +# @option -n | --nomodule= Exclude one or more modules in the analysis +# @option --markdown Generate the shdoc markdown file for this program # @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode) # @option -d | --debug Turn on extra debugging output # @option -h | --help Print help message and exit (overrides other options) @@ -87,12 +86,15 @@ print_help() { OPTIONS: --project, -p PROJECT PROJECT should follow the pattern ${PROJECT_PREFIX}_PROJECT_NAME - --include, -i MODULE + --module, -i MODULE See MODULES section below for list of available modules If no --include is specified, all modules are run - --exclude, -x MODULE - See MODULES section below for list of modules to exclude - --markdown, -m + --submodule, -s SUBMODULE "[ARG1],[ARG2]..." (string of comma delimited arguments) + See SUBMODULES section below for list of available modules + See documentation for submodule argument usage + --nomodule, -n MODULE + See MODULES and SUBMODULES section below for list of modules to exclude + --markdown Generate the shdoc markdown README.md file for this program --yes, -y, --auto Always answer yes to questions (non-interactive mode) @@ -137,8 +139,8 @@ print_help() { parse_input() { debug "Running: ${FUNCNAME[0]}" "$@" - long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help" - short_opts="+p:i:x:mydh" + long_opts="project:,module:,nomodule:,markdown,yes,auto,debug,help" + short_opts="+p:m:n:ydh" if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then eval set -- "$input" @@ -146,29 +148,23 @@ parse_input() { case $1 in --project|-p) shift - if [[ $1 == *','* ]] ; then # check for commas - IFS=',' read -ra PROJECTS <<< "$1" - else - PROJECTS+=("$1") - fi + IFS=',' read -ra PROJECTS <<< "$1" ;; - --include|-i) + --module|-m) shift - if [[ $1 == *','* ]] ; then # check for commas - IFS=',' read -ra INCLUDE_MODULES <<< "$1" - else - INCLUDE_MODULES+=("$1") - fi + IFS=',' read -ra INCLUDE_MODULES <<< "$1" ;; - --exclude|-x) + --submodule|-s) shift - if [[ $1 == *','* ]] ; then # check for commas - IFS=',' read -ra EXCLUDE_MODULES <<< "$1" - else - EXCLUDE_MODULES+=("$1") - fi + IFS=',' read -ra SUBMODULES <<< "$1" + shift + IFS=',' read -ra SUBMODULES <<< "$1" ;; - --markdown|-m) + --nomodule|-n) + shift + EXCLUDE_MODULES+=("$1") + ;; + --markdown) documentation; exit 0 # TODO disable the exit after development ;; --yes|-y|--auto) @@ -210,15 +206,11 @@ submodule() { ALL_SUBMODULES+=("$1") declare -gA "$1" } -# This function will only work if users have an actual name registered on the server -# TODO for now just use username -# user_initials() { -# user_record="$(getent passwd "$(whoami)")" -# user_gecos_field="$(echo "$user_record" | cut -d ':' -f 5)" -# user_full_name="$(echo "$user_gecos_field" | cut -d ',' -f 1)" -# last="${user_full_name#* }" -# echo "${user_full_name:0:1}${last:0:1}" -# } + +# @arg $1 string The question to ask +# @exitcode 0 If yes +# @exitcode 1 If no +# @internal ask() { declare response (( YES )) && return 0 @@ -309,11 +301,6 @@ print_header() { echo "Example: SCANS_DIR=/path/to/scans OUT_DIR=/path/to/out ./qhtcp-workflow" echo "" - # Gather projects from SCANS_DIR - shopt -s nullglob - projects=("$SCANS_DIR"/*/) - shopt -u nullglob - echo "Available Modules:" for i in "${!ALL_MODULES[@]}"; do printf "%d. %s\n" "$((i+1))" "${ALL_MODULES[i]}" @@ -326,6 +313,10 @@ print_header() { done echo "" + # Gather and list projects from SCANS_DIR + shopt -s nullglob + projects=("$SCANS_DIR"/*/) + shopt -u nullglob echo "Available Projects:" projects=("${projects[@]%/}") # strip comma first! projects=("${projects[@]##*/}") @@ -334,6 +325,7 @@ print_header() { done echo "" + # Let user choose project(s) if [[ -z ${PROJECTS[*]} ]]; then num=$((${#projects[@]})) echo "Enter project #'s to run (comma delimited)" @@ -346,7 +338,8 @@ print_header() { unset response arr i fi - if [[ -z ${MODULES[*]} ]]; then + + if [[ -z ${MODULES[*]} && -z ${INCLUDE_MODULES[*]} && -z ${EXCLUDE_MODULES[*]} ]]; then echo "Enter module #'s to run (by #, comma delimited)" ((YES)) || read -r -p "Hit enter for default (ALL): " response if [[ -n $response ]]; then @@ -358,18 +351,23 @@ print_header() { unset response arr i fi - if [[ -z ${SUBMODULES[*]} ]]; then - echo "Enter submodule #'s to run (by #, comma delimited):" - read -r -p "Hit enter for default (ALL): " response - if [[ -n $response ]]; then - IFS=',' read -ra arr <<< "$response" - for i in "${arr[@]}"; do - SUBMODULES+=("${ALL_SUBMODULES[$((i-1))]}") - done - fi - unset response arr i + if [[ -z ${MODULES[*]} && -z ${INCLUDE_MODULES[*]} && -z ${EXCLUDE_MODULES[*]} && -z ${SUBMODULES[*]} ]]; then + while :; do + echo "Enter a submodule followed by its arguments as a case delimited string in quotes" + echo "Example: ${ALL_SUBMODULES[0]} \"arg1,arg2,arg3...\"" + read -r -p "Or hit Enter to continue: " response + [[ -z $response ]] && break + IFS=' ' read -ra arr <<< "$response" + if [[ ! ${#arr[@]} -eq 2 ]]; then + err "Enter a submodule name followed by its arguments as a case delimited string in quotes" + err "The second argument is required and may be an empty string, \"\"" + else + SUBMODULES+=("${arr[@]}") + fi + unset response arr i + done fi - + # cat <<-EOF # Available modules: ${ALL_MODULES[*]} # Available submodules: ${ALL_SUBMODULES[*]} @@ -1616,6 +1614,9 @@ main() { fi fi + # Make sure we are using the absolute path + SCANS_DIR=$(realpath -s "$SCANS_DIR") + # Find an output directory local out_heirarchy=("$(dirname "$SCANS_DIR")/out" "$SCRIPT_DIR/out" "/mnt/data/out") for d in "${out_heirarchy[@]}"; do @@ -1637,6 +1638,9 @@ main() { fi fi + # Make sure we are using the absolute path + OUT_DIR=$(realpath -s "$OUT_DIR") + # Set the automatic project directory prefix PROJECT_PREFIX="${DATE}_${PROJECT_USER}" # reversed these so easier to sort and parse date sanitize_pn() { [[ $1 =~ [0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_.+_.+ ]]; } # sanitizer regex for prefix @@ -1678,12 +1682,23 @@ main() { # Sanitize MODULES for i in "${!MODULES[@]}"; do if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then - echo "Module $m not in the module list" - echo "Available modules: ${ALL_MODULES[*]}" - read -r -p "Enter replacement module name: " MODULE - ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULE}[[:space:]] ]] || (echo "RTFM"; return 1) - MODULES[i]="$MODULE" + echo "Module ${MODULES[$i]} not in the module list" + echo "Available modules:" + printf "%s\n" "${ALL_MODULES[@]}" + read -r -p "Enter replacement module name: " module + ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${module}[[:space:]] ]] || (echo "RTFM"; return 1) + MODULES[i]="$module" fi + unset module + done + + # Sanitize SUBMODULES + for i in "${!SUBMODULES[@]}"; do + if ! [[ " ${ALL_SUBMODULES[*]} " =~ [[:space:]]${SUBMODULES[i]}[[:space:]] ]]; then + echo "Submodule ${SUBMODULES[$i]} not in the module list, removing" + unset "SUBMODULES[i]" "SUBMODULES[$((i+1))]" + fi + continue 2 # skip the arguments string done # Loop over projects @@ -1711,6 +1726,14 @@ main() { for m in "${MODULES[@]}"; do ask "Run $m module?" && "$m" done + + # Run selected submodules + for i in "${!SUBMODULES[@]}"; do + IFS=',' read -ra cmds <<< "${SUBMODULES[$((i+1))]}" # load the command args + ask "Run ${SUBMODULES[i]} submodule with args ${cmds[*]}?" && + "${SUBMODULES[i]}" "${cmds[@]}" + continue 2 # skip the command string + done done }