All users to pass args to submodules

This commit is contained in:
2024-08-01 21:40:06 -04:00
parent 3e76a96c24
commit 72dd4e1d1c

View File

@@ -9,7 +9,7 @@
# @brief One script to rule them all (see: xkcd #927) # @brief One script to rule them all (see: xkcd #927)
# @description A flexible yet opinionated analysis framework for the Hartman Lab # @description A flexible yet opinionated analysis framework for the Hartman Lab
# There should be at least 4 subdirectories to organize Q-HTCP data and analysis. The parent directory is simply called 'Q-HTCP' and the 4 are subdirectories described below (Fig. 1): # There should be at least 4 subdirectories to organize Q-HTCP data and analysis. The parent directory is simply called 'Q-HTCP' and the 4 are subdirectories described below (Fig. 1):
# * **ExpJobs** # * **scans/**
# * This directory contains raw image data and image analysis results for the entire collection of Q-HTCP experiments. # * This directory contains raw image data and image analysis results for the entire collection of Q-HTCP experiments.
# * We recommend each subdirectory within 'ExpJobs" should represent a single Q-HTCP experiment and be named using the following convention (AB yyyy_mmdd_PerturbatationsOfInterest): experimenter initials ('AB '), date ('yyyy_mmdd_'), and brief description ('drugs_medias'). # * We recommend each subdirectory within 'ExpJobs" should represent a single Q-HTCP experiment and be named using the following convention (AB yyyy_mmdd_PerturbatationsOfInterest): experimenter initials ('AB '), date ('yyyy_mmdd_'), and brief description ('drugs_medias').
# * Each subdirectory contains the Raw Image Folders for that experiment (a series of N folders with successive integer labels 1 to N, each folder containing the time series of images for a single cell array). It also contains a user-supplied subfolder, which must be named ''MasterPlateFiles" and must contain two excel files, one named 'DrugMedia_experimentdescription' and the other named 'MasterPlate_experimentdescription'. The bolded part of the file name including the underscore is required. The italicized part is optional description. Generally the 'DrugMedia_' file merits description. # * Each subdirectory contains the Raw Image Folders for that experiment (a series of N folders with successive integer labels 1 to N, each folder containing the time series of images for a single cell array). It also contains a user-supplied subfolder, which must be named ''MasterPlateFiles" and must contain two excel files, one named 'DrugMedia_experimentdescription' and the other named 'MasterPlate_experimentdescription'. The bolded part of the file name including the underscore is required. The italicized part is optional description. Generally the 'DrugMedia_' file merits description.
@@ -18,7 +18,7 @@
# * Together they encapsulate and define the experimental design. # * Together they encapsulate and define the experimental design.
# * The QHTCPImageFolders and 'MasterPlateFiles' folder are the inputs for image analysis with EASY software. # * The QHTCPImageFolders and 'MasterPlateFiles' folder are the inputs for image analysis with EASY software.
# * As further described below, EASY will automatically generate a 'Results' directory (within the ExpJobs/'ExperimentJob' folder) with a name that consists of a system-generated timestamp and an optional short description provided by the user (Fig.2). The 'Results' directory is created and entered, using the "File >> New Experiment" dropdown in EASY. Multiple 'Results' files may be created (and uniquely named) within an 'ExperimentJob' folder. # * As further described below, EASY will automatically generate a 'Results' directory (within the ExpJobs/'ExperimentJob' folder) with a name that consists of a system-generated timestamp and an optional short description provided by the user (Fig.2). The 'Results' directory is created and entered, using the "File >> New Experiment" dropdown in EASY. Multiple 'Results' files may be created (and uniquely named) within an 'ExperimentJob' folder.
# * **EASY** # * **apps/easy/**
# * This directory contains the GUI-enabled MATLAB software to accomplish image analysis and growth curve fitting. # * This directory contains the GUI-enabled MATLAB software to accomplish image analysis and growth curve fitting.
# * EASY analyzes Q-HTCP image data within an 'ExperimentJob'' folder (described above; each cell array has its own folder containing its entire time series of images). # * EASY analyzes Q-HTCP image data within an 'ExperimentJob'' folder (described above; each cell array has its own folder containing its entire time series of images).
# * EASY analysis produces image quantification data and growth curve fitting results for each cell array; these results are subsequently assembled into a single file and labeled, using information contained in the 'MasterPlate_' and 'DrugMedia_' files in the 'MasterPlateFiles' subdirectory. # * EASY analysis produces image quantification data and growth curve fitting results for each cell array; these results are subsequently assembled into a single file and labeled, using information contained in the 'MasterPlate_' and 'DrugMedia_' files in the 'MasterPlateFiles' subdirectory.
@@ -26,13 +26,11 @@
# * The /EASY directory is simply where the latest EASY version resides (additional versions in development or legacy versions may also be stored there). # * The /EASY directory is simply where the latest EASY version resides (additional versions in development or legacy versions may also be stored there).
# * The raw data inputs and result outputs for EASY are kept in the 'ExpJobs' directory. # * The raw data inputs and result outputs for EASY are kept in the 'ExpJobs' directory.
# * EASY also outputs a '.mat' file that is stored in the 'matResults' folder and is named with the TimeStamp and user-provided name appended to the 'Results' folder name when 'New Experiment' is executed from the 'File' Dropdown menu in the EASY console. # * EASY also outputs a '.mat' file that is stored in the 'matResults' folder and is named with the TimeStamp and user-provided name appended to the 'Results' folder name when 'New Experiment' is executed from the 'File' Dropdown menu in the EASY console.
# * **EZview** # * **apps/ezview/**
# * This directory contains the GUI-enabled MATLAB software to conveniently and efficiently mine the raw cell array image data for a Q-HTCP experiment. # * This directory contains the GUI-enabled MATLAB software to conveniently and efficiently mine the raw cell array image data for a Q-HTCP experiment.
# * It takes the Results.m file (created by EASY software) as an input and permits the user to navigate through the raw image data and growth curve results for the experiment. # * It takes the Results.m file (created by EASY software) as an input and permits the user to navigate through the raw image data and growth curve results for the experiment.
# * The /EZview provides a place for storing the the latest EZview version (as well as other EZview versions). # * The /EZview provides a place for storing the the latest EZview version (as well as other EZview versions).
# * The /EZview provides a GUI for examining the EASY results as provided in the …/matResults/… .mat file. # * The /EZview provides a GUI for examining the EASY results as provided in the …/matResults/… .mat file.
# * **StudiesQHTCP**
# * This directory contains the GUI-enabled JAVA software composite (MATLAB, JAVA, R, Python, Perl, Shell) that takes growth curve results (created by EASY software) as an input and successively generates interaction Z-score results, which are used for graphing gene interactions, Clustering, Gene Ontology analysis, and other ways of interpreting and visualizing the experimental quality and outcomes. {The /StudiesQHTCP folder contains the ordered command line scripts that call sets of other scripts to perform data selection and adaptation from the extracted text results spreadsheet found in the /ExpJobs/experiment name/Results…/PrintResults/ folder. In particular the 'user customize interactionCode4experiment.R' file. It also contains a multitude of R generated plots based on the selected data and possible adaptation. All clustering and Gene ontology analysis are derived from the 'ZScores_Interaction.csv' file found in the/ZScores subdirectory.}
# * **Master Plates** # * **Master Plates**
# * This optional folder is a convenient place to store copies of the 'MasterPlate_' and a 'DrugMedia_' file templates, along with previously used files that may have been modified and could be reused or further modified to enable future analyses. # * This optional folder is a convenient place to store copies of the 'MasterPlate_' and a 'DrugMedia_' file templates, along with previously used files that may have been modified and could be reused or further modified to enable future analyses.
# * These two file types are required in the 'MasterPlateFiles' folder, which catalogs experimental information specific to individual Jobs in the ExpJobs folder, as described further below. # * These two file types are required in the 'MasterPlateFiles' folder, which catalogs experimental information specific to individual Jobs in the ExpJobs folder, as described further below.
@@ -52,9 +50,10 @@
# * Local vars have a higher likelihood of being lower case, global vars are UPPER # * Local vars have a higher likelihood of being lower case, global vars are UPPER
# #
# @option -p<value> | --project=<value> Include one or more projects in the analysis # @option -p<value> | --project=<value> Include one or more projects in the analysis
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules) # @option -m<value> | --module=<value> Include one or more modules in the analysis (default: all modules)
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis # @option -s<value> | --submodule=<value> <cmd> Pass arguments or commands to a submodule in the current project context
# @option -m | --markdown Generate the shdoc markdown file for this program # @option -n<value> | --nomodule=<value> Exclude one or more modules in the analysis
# @option --markdown Generate the shdoc markdown file for this program
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode) # @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
# @option -d | --debug Turn on extra debugging output # @option -d | --debug Turn on extra debugging output
# @option -h | --help Print help message and exit (overrides other options) # @option -h | --help Print help message and exit (overrides other options)
@@ -87,12 +86,15 @@ print_help() {
OPTIONS: OPTIONS:
--project, -p PROJECT --project, -p PROJECT
PROJECT should follow the pattern ${PROJECT_PREFIX}_PROJECT_NAME PROJECT should follow the pattern ${PROJECT_PREFIX}_PROJECT_NAME
--include, -i MODULE --module, -i MODULE
See MODULES section below for list of available modules See MODULES section below for list of available modules
If no --include is specified, all modules are run If no --include is specified, all modules are run
--exclude, -x MODULE --submodule, -s SUBMODULE "[ARG1],[ARG2]..." (string of comma delimited arguments)
See MODULES section below for list of modules to exclude See SUBMODULES section below for list of available modules
--markdown, -m See documentation for submodule argument usage
--nomodule, -n MODULE
See MODULES and SUBMODULES section below for list of modules to exclude
--markdown
Generate the shdoc markdown README.md file for this program Generate the shdoc markdown README.md file for this program
--yes, -y, --auto --yes, -y, --auto
Always answer yes to questions (non-interactive mode) Always answer yes to questions (non-interactive mode)
@@ -137,8 +139,8 @@ print_help() {
parse_input() { parse_input() {
debug "Running: ${FUNCNAME[0]}" "$@" debug "Running: ${FUNCNAME[0]}" "$@"
long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help" long_opts="project:,module:,nomodule:,markdown,yes,auto,debug,help"
short_opts="+p:i:x:mydh" short_opts="+p:m:n:ydh"
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
eval set -- "$input" eval set -- "$input"
@@ -146,29 +148,23 @@ parse_input() {
case $1 in case $1 in
--project|-p) --project|-p)
shift shift
if [[ $1 == *','* ]] ; then # check for commas
IFS=',' read -ra PROJECTS <<< "$1" IFS=',' read -ra PROJECTS <<< "$1"
else
PROJECTS+=("$1")
fi
;; ;;
--include|-i) --module|-m)
shift shift
if [[ $1 == *','* ]] ; then # check for commas
IFS=',' read -ra INCLUDE_MODULES <<< "$1" IFS=',' read -ra INCLUDE_MODULES <<< "$1"
else
INCLUDE_MODULES+=("$1")
fi
;; ;;
--exclude|-x) --submodule|-s)
shift shift
if [[ $1 == *','* ]] ; then # check for commas IFS=',' read -ra SUBMODULES <<< "$1"
IFS=',' read -ra EXCLUDE_MODULES <<< "$1" shift
else IFS=',' read -ra SUBMODULES <<< "$1"
EXCLUDE_MODULES+=("$1")
fi
;; ;;
--markdown|-m) --nomodule|-n)
shift
EXCLUDE_MODULES+=("$1")
;;
--markdown)
documentation; exit 0 # TODO disable the exit after development documentation; exit 0 # TODO disable the exit after development
;; ;;
--yes|-y|--auto) --yes|-y|--auto)
@@ -210,15 +206,11 @@ submodule() {
ALL_SUBMODULES+=("$1") ALL_SUBMODULES+=("$1")
declare -gA "$1" declare -gA "$1"
} }
# This function will only work if users have an actual name registered on the server
# TODO for now just use username # @arg $1 string The question to ask
# user_initials() { # @exitcode 0 If yes
# user_record="$(getent passwd "$(whoami)")" # @exitcode 1 If no
# user_gecos_field="$(echo "$user_record" | cut -d ':' -f 5)" # @internal
# user_full_name="$(echo "$user_gecos_field" | cut -d ',' -f 1)"
# last="${user_full_name#* }"
# echo "${user_full_name:0:1}${last:0:1}"
# }
ask() { ask() {
declare response declare response
(( YES )) && return 0 (( YES )) && return 0
@@ -309,11 +301,6 @@ print_header() {
echo "Example: SCANS_DIR=/path/to/scans OUT_DIR=/path/to/out ./qhtcp-workflow" echo "Example: SCANS_DIR=/path/to/scans OUT_DIR=/path/to/out ./qhtcp-workflow"
echo "" echo ""
# Gather projects from SCANS_DIR
shopt -s nullglob
projects=("$SCANS_DIR"/*/)
shopt -u nullglob
echo "Available Modules:" echo "Available Modules:"
for i in "${!ALL_MODULES[@]}"; do for i in "${!ALL_MODULES[@]}"; do
printf "%d. %s\n" "$((i+1))" "${ALL_MODULES[i]}" printf "%d. %s\n" "$((i+1))" "${ALL_MODULES[i]}"
@@ -326,6 +313,10 @@ print_header() {
done done
echo "" echo ""
# Gather and list projects from SCANS_DIR
shopt -s nullglob
projects=("$SCANS_DIR"/*/)
shopt -u nullglob
echo "Available Projects:" echo "Available Projects:"
projects=("${projects[@]%/}") # strip comma first! projects=("${projects[@]%/}") # strip comma first!
projects=("${projects[@]##*/}") projects=("${projects[@]##*/}")
@@ -334,6 +325,7 @@ print_header() {
done done
echo "" echo ""
# Let user choose project(s)
if [[ -z ${PROJECTS[*]} ]]; then if [[ -z ${PROJECTS[*]} ]]; then
num=$((${#projects[@]})) num=$((${#projects[@]}))
echo "Enter project #'s to run (comma delimited)" echo "Enter project #'s to run (comma delimited)"
@@ -346,7 +338,8 @@ print_header() {
unset response arr i unset response arr i
fi fi
if [[ -z ${MODULES[*]} ]]; then
if [[ -z ${MODULES[*]} && -z ${INCLUDE_MODULES[*]} && -z ${EXCLUDE_MODULES[*]} ]]; then
echo "Enter module #'s to run (by #, comma delimited)" echo "Enter module #'s to run (by #, comma delimited)"
((YES)) || read -r -p "Hit enter for default (ALL): " response ((YES)) || read -r -p "Hit enter for default (ALL): " response
if [[ -n $response ]]; then if [[ -n $response ]]; then
@@ -358,16 +351,21 @@ print_header() {
unset response arr i unset response arr i
fi fi
if [[ -z ${SUBMODULES[*]} ]]; then if [[ -z ${MODULES[*]} && -z ${INCLUDE_MODULES[*]} && -z ${EXCLUDE_MODULES[*]} && -z ${SUBMODULES[*]} ]]; then
echo "Enter submodule #'s to run (by #, comma delimited):" while :; do
read -r -p "Hit enter for default (ALL): " response echo "Enter a submodule followed by its arguments as a case delimited string in quotes"
if [[ -n $response ]]; then echo "Example: ${ALL_SUBMODULES[0]} \"arg1,arg2,arg3...\""
IFS=',' read -ra arr <<< "$response" read -r -p "Or hit Enter to continue: " response
for i in "${arr[@]}"; do [[ -z $response ]] && break
SUBMODULES+=("${ALL_SUBMODULES[$((i-1))]}") IFS=' ' read -ra arr <<< "$response"
done if [[ ! ${#arr[@]} -eq 2 ]]; then
err "Enter a submodule name followed by its arguments as a case delimited string in quotes"
err "The second argument is required and may be an empty string, \"\""
else
SUBMODULES+=("${arr[@]}")
fi fi
unset response arr i unset response arr i
done
fi fi
# cat <<-EOF # cat <<-EOF
@@ -1616,6 +1614,9 @@ main() {
fi fi
fi fi
# Make sure we are using the absolute path
SCANS_DIR=$(realpath -s "$SCANS_DIR")
# Find an output directory # Find an output directory
local out_heirarchy=("$(dirname "$SCANS_DIR")/out" "$SCRIPT_DIR/out" "/mnt/data/out") local out_heirarchy=("$(dirname "$SCANS_DIR")/out" "$SCRIPT_DIR/out" "/mnt/data/out")
for d in "${out_heirarchy[@]}"; do for d in "${out_heirarchy[@]}"; do
@@ -1637,6 +1638,9 @@ main() {
fi fi
fi fi
# Make sure we are using the absolute path
OUT_DIR=$(realpath -s "$OUT_DIR")
# Set the automatic project directory prefix # Set the automatic project directory prefix
PROJECT_PREFIX="${DATE}_${PROJECT_USER}" # reversed these so easier to sort and parse date PROJECT_PREFIX="${DATE}_${PROJECT_USER}" # reversed these so easier to sort and parse date
sanitize_pn() { [[ $1 =~ [0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_.+_.+ ]]; } # sanitizer regex for prefix sanitize_pn() { [[ $1 =~ [0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_.+_.+ ]]; } # sanitizer regex for prefix
@@ -1678,12 +1682,23 @@ main() {
# Sanitize MODULES # Sanitize MODULES
for i in "${!MODULES[@]}"; do for i in "${!MODULES[@]}"; do
if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then
echo "Module $m not in the module list" echo "Module ${MODULES[$i]} not in the module list"
echo "Available modules: ${ALL_MODULES[*]}" echo "Available modules:"
read -r -p "Enter replacement module name: " MODULE printf "%s\n" "${ALL_MODULES[@]}"
! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULE}[[:space:]] ]] || (echo "RTFM"; return 1) read -r -p "Enter replacement module name: " module
MODULES[i]="$MODULE" ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${module}[[:space:]] ]] || (echo "RTFM"; return 1)
MODULES[i]="$module"
fi fi
unset module
done
# Sanitize SUBMODULES
for i in "${!SUBMODULES[@]}"; do
if ! [[ " ${ALL_SUBMODULES[*]} " =~ [[:space:]]${SUBMODULES[i]}[[:space:]] ]]; then
echo "Submodule ${SUBMODULES[$i]} not in the module list, removing"
unset "SUBMODULES[i]" "SUBMODULES[$((i+1))]"
fi
continue 2 # skip the arguments string
done done
# Loop over projects # Loop over projects
@@ -1711,6 +1726,14 @@ main() {
for m in "${MODULES[@]}"; do for m in "${MODULES[@]}"; do
ask "Run $m module?" && "$m" ask "Run $m module?" && "$m"
done done
# Run selected submodules
for i in "${!SUBMODULES[@]}"; do
IFS=',' read -ra cmds <<< "${SUBMODULES[$((i+1))]}" # load the command args
ask "Run ${SUBMODULES[i]} submodule with args ${cmds[*]}?" &&
"${SUBMODULES[i]}" "${cmds[@]}"
continue 2 # skip the command string
done
done done
} }