diff --git a/workflow/script-run-workflow b/workflow/script-run-workflow index 98fa4c2f..314668cf 100755 --- a/workflow/script-run-workflow +++ b/workflow/script-run-workflow @@ -1,7 +1,7 @@ #!/usr/bin/env bash # Copyright 2024 Bryan C. Roessler # -# This is a code scratchpad for organizing the Hartman Lab Server workflow +# This is a flexible yet opinionated analysis workflow for the Hartman Lab # It contains a mixture of code/pseudocode and shouldn't be run until this message is removed # # Allow indirect functions @@ -14,6 +14,7 @@ # @option -p | --project= Include one or more projects in the analysis # @option -i | --include= Include one or more modules in the analysis (default: all modules) # @option -x | --exclude= Exclude one or more modules in the analysis +# @option -m | --markdown Generate the shdoc markdown file for this program # @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode) # @option -d | --debug Turn on extra debugging output # @option -h | --help Print help message and exit (overrides other options) @@ -49,6 +50,8 @@ print_help() { If no --include is specified, all modules are run --exclude, -x MODULE See MODULES section below for list of modules to exclude + --markdown, -m + Generate the shdoc markdown file for this program --yes, -y, --auto Always answer yes to questions (non-interactive mode) --debug, -d @@ -86,8 +89,8 @@ print_help() { parse_input() { debug "Running: ${FUNCNAME[0]}" "$@" - long_opts="project:,include:,exclude:,yes,auto,debug,help" - short_opts="+p:i:x:yhd" + long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help" + short_opts="+p:i:x:mydh" if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then eval set -- "$input" @@ -117,6 +120,9 @@ parse_input() { EXCLUDE_MODULES+=("$1") fi ;; + --markdown|-m) + documentation + ;; --yes|-y|--auto) declare -g YES=1 ;; @@ -176,8 +182,9 @@ debug() { (( DEBUG )) && echo "Debug: $*"; } # @section Modules # @description A module contains a cohesive set of actions/experiments to run on a project # Use a module when: -# * Building a new type of analysis -# * Combining submodules +# * Building a new type of analysis from scratch +# * Generates project directories +# * Can combine other modules and submodules # # @@ -327,13 +334,13 @@ qhtcp() { # Our list of submodules (functions) to run for this module # Put these in the appropriate order of operations - submodules=( + modules=( r_join_interact java_extract r_add_shift_values r_heat_maps_zscores r_heat_maps_homology - py_gtf + gtf r_compile_gtf ) @@ -364,12 +371,31 @@ qhtcp() { pushd "$QHTCP_DIR/REMc" || return 1 # Run each submodule - for s in "${submodules[@]}"; do "$s"; done + for s in "${modules[@]}"; do "$s"; done popd || return 1 } +module gtf +# @section GTF +# @description GTF module for QHTCP +gtf() { + debug "Running: ${FUNCNAME[0]}" + process_dir="GTF/Process" + function_dir="GTF/Function" + component_dir="GTF/Component" + + py_gtf "$process_dir" + + # Perform operations in each directory in parallel + for d in "$process_dir" "$function_dir" "$component_dir"; do + rsync -a "$process_dir/REMcRdy_lm_only" "$d"/ + out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename + pl_gtf "$d" "$out_file" & # parallelize + done + +} # @section Submodules # @description Submodules provide functionaility to modules and are reusable between modules # Use a submodule when: @@ -474,52 +500,45 @@ r_heat_maps_homology() { submodule py_gtf -# @description Perform GTF +# @description Perform python portion of GTF +# @arg $1 string Directory to process py_gtf() { debug "Running: ${FUNCNAME[0]}" - process_dir="GTF/Process" - function_dir="GTF/Function" - component_dir="GTF/Component" in_file="REMcRdy_lm_only.csv-finalTable.csv" - out_file="$process_dir/REMcRdy_lm_only/1-0-0-finaltable.csv" - echo "$PYTHON DconJG2.py $in_file $process_dir/" - "$PYTHON" DconJG2.py "$in_file" "$process_dir/" + out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv" + debug "$PYTHON DconJG2.py $in_file $1/" + "$PYTHON" DconJG2.py "$in_file" "$1/" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) - unset out_file - rsync -a "$process_dir/REMcRdy_lm_only" GTF/Function/ - rsync -a "$process_dir/REMcRdy_lm_only" GTF/Component/ +} - # @description Not sure what to name this - # @arg $1 string directory name - _process() { - debug "Running: ${FUNCNAME[0]}" "$@" - pushd "$1" || return 1 - shopt -s nullglob - set2=(REMcRdy_lm_only/*.txt) - shopt -u nullglob +submodule pl_gtf +# @description Perl module for GTF +# @arg $1 string working directory +# @arg $2 string output file +pl_gtf() { + debug "Running: ${FUNCNAME[0]}" "$@" + set1="ORF_List_Without_DAmPs.txt" + pushd "$1" || return 1 - for s in "${set2[@]}"; do - pl_analyze "$set1" "$s" - pl_terms2tsv "$s" - "$PERL" terms2tsv_v4.pl "$s.terms" > "$s.tsv" - done + shopt -s nullglob + set2=(REMcRdy_lm_only/*.txt) + shopt -u nullglob - # Concat the process ontology outputs from the /REMcReady_lm_only folder - echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $out_file" - echo "TODO: Concatenate_GTF_results.py should be translated to bash" - "$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$out_file" - - [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) - popd || return 1 - } - - # Perform operations in each directory - for d in "$process_dir" "$function_dir" "$component_dir"; do - set1="ORF_List_Without_DAmPs.txt" - out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename - _process "$d" & # parallelize + for s in "${set2[@]}"; do + debug "pl_analyze $set1 $s" + pl_analyze "$set1" "$s" + debug "pl_terms2tsv $s" + pl_terms2tsv "$s" done + + # Concat the process ontology outputs from the /REMcReady_lm_only folder + echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2" + echo "TODO: Concatenate_GTF_results.py should be translated to bash" + "$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2" + + [[ -f $2 ]] || (echo "$2 does not exist"; return 1) + popd || return 1 } @@ -550,23 +569,7 @@ pl_terms2tsv() { } -submodule documentation -# @section Documentation -# @description Generates markdown documentation from this script using shdoc -documentation() { - debug "Running: ${FUNCNAME[0]}" - - # Print markdown to stdout - shdoc < "$SCRIPT" - - # Create markdown file - shdoc < "$SCRIPT" > documentation.md - -} - - - - +submodule r_compile_gtf # @description Compile GTF in R r_compile_gtf() { debug "Running: ${FUNCNAME[0]}" @@ -575,6 +578,18 @@ r_compile_gtf() { } +submodule documentation +# @section Documentation +# @description Generates markdown documentation from this script using shdoc +documentation() { + debug "Running: ${FUNCNAME[0]}" + # Print markdown to stdout + ((DEBUG)) && shdoc < "$SCRIPT" + # Create markdown file + shdoc < "$SCRIPT" > documentation.md +} + + # @description The main loop of script-run-workflow # May eventually need to add git ops # Passes on arguments