Break up GTR into module and submodules

This commit is contained in:
2024-07-22 17:15:46 -04:00
parent 258fd070ef
commit be1e9869b3

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Copyright 2024 Bryan C. Roessler # Copyright 2024 Bryan C. Roessler
# #
# This is a code scratchpad for organizing the Hartman Lab Server workflow # This is a flexible yet opinionated analysis workflow for the Hartman Lab
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed # It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
# #
# Allow indirect functions # Allow indirect functions
@@ -14,6 +14,7 @@
# @option -p<value> | --project=<value> Include one or more projects in the analysis # @option -p<value> | --project=<value> Include one or more projects in the analysis
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules) # @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis # @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
# @option -m | --markdown Generate the shdoc markdown file for this program
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode) # @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
# @option -d | --debug Turn on extra debugging output # @option -d | --debug Turn on extra debugging output
# @option -h | --help Print help message and exit (overrides other options) # @option -h | --help Print help message and exit (overrides other options)
@@ -49,6 +50,8 @@ print_help() {
If no --include is specified, all modules are run If no --include is specified, all modules are run
--exclude, -x MODULE --exclude, -x MODULE
See MODULES section below for list of modules to exclude See MODULES section below for list of modules to exclude
--markdown, -m
Generate the shdoc markdown file for this program
--yes, -y, --auto --yes, -y, --auto
Always answer yes to questions (non-interactive mode) Always answer yes to questions (non-interactive mode)
--debug, -d --debug, -d
@@ -86,8 +89,8 @@ print_help() {
parse_input() { parse_input() {
debug "Running: ${FUNCNAME[0]}" "$@" debug "Running: ${FUNCNAME[0]}" "$@"
long_opts="project:,include:,exclude:,yes,auto,debug,help" long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help"
short_opts="+p:i:x:yhd" short_opts="+p:i:x:mydh"
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
eval set -- "$input" eval set -- "$input"
@@ -117,6 +120,9 @@ parse_input() {
EXCLUDE_MODULES+=("$1") EXCLUDE_MODULES+=("$1")
fi fi
;; ;;
--markdown|-m)
documentation
;;
--yes|-y|--auto) --yes|-y|--auto)
declare -g YES=1 declare -g YES=1
;; ;;
@@ -176,8 +182,9 @@ debug() { (( DEBUG )) && echo "Debug: $*"; }
# @section Modules # @section Modules
# @description A module contains a cohesive set of actions/experiments to run on a project # @description A module contains a cohesive set of actions/experiments to run on a project
# Use a module when: # Use a module when:
# * Building a new type of analysis # * Building a new type of analysis from scratch
# * Combining submodules # * Generates project directories
# * Can combine other modules and submodules
# #
# #
@@ -327,13 +334,13 @@ qhtcp() {
# Our list of submodules (functions) to run for this module # Our list of submodules (functions) to run for this module
# Put these in the appropriate order of operations # Put these in the appropriate order of operations
submodules=( modules=(
r_join_interact r_join_interact
java_extract java_extract
r_add_shift_values r_add_shift_values
r_heat_maps_zscores r_heat_maps_zscores
r_heat_maps_homology r_heat_maps_homology
py_gtf gtf
r_compile_gtf r_compile_gtf
) )
@@ -364,12 +371,31 @@ qhtcp() {
pushd "$QHTCP_DIR/REMc" || return 1 pushd "$QHTCP_DIR/REMc" || return 1
# Run each submodule # Run each submodule
for s in "${submodules[@]}"; do "$s"; done for s in "${modules[@]}"; do "$s"; done
popd || return 1 popd || return 1
} }
module gtf
# @section GTF
# @description GTF module for QHTCP
gtf() {
debug "Running: ${FUNCNAME[0]}"
process_dir="GTF/Process"
function_dir="GTF/Function"
component_dir="GTF/Component"
py_gtf "$process_dir"
# Perform operations in each directory in parallel
for d in "$process_dir" "$function_dir" "$component_dir"; do
rsync -a "$process_dir/REMcRdy_lm_only" "$d"/
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
pl_gtf "$d" "$out_file" & # parallelize
done
}
# @section Submodules # @section Submodules
# @description Submodules provide functionaility to modules and are reusable between modules # @description Submodules provide functionaility to modules and are reusable between modules
# Use a submodule when: # Use a submodule when:
@@ -474,25 +500,25 @@ r_heat_maps_homology() {
submodule py_gtf submodule py_gtf
# @description Perform GTF # @description Perform python portion of GTF
# @arg $1 string Directory to process
py_gtf() { py_gtf() {
debug "Running: ${FUNCNAME[0]}" debug "Running: ${FUNCNAME[0]}"
process_dir="GTF/Process"
function_dir="GTF/Function"
component_dir="GTF/Component"
in_file="REMcRdy_lm_only.csv-finalTable.csv" in_file="REMcRdy_lm_only.csv-finalTable.csv"
out_file="$process_dir/REMcRdy_lm_only/1-0-0-finaltable.csv" out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv"
echo "$PYTHON DconJG2.py $in_file $process_dir/" debug "$PYTHON DconJG2.py $in_file $1/"
"$PYTHON" DconJG2.py "$in_file" "$process_dir/" "$PYTHON" DconJG2.py "$in_file" "$1/"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
unset out_file }
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Function/
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Component/
# @description Not sure what to name this
# @arg $1 string directory name submodule pl_gtf
_process() { # @description Perl module for GTF
# @arg $1 string working directory
# @arg $2 string output file
pl_gtf() {
debug "Running: ${FUNCNAME[0]}" "$@" debug "Running: ${FUNCNAME[0]}" "$@"
set1="ORF_List_Without_DAmPs.txt"
pushd "$1" || return 1 pushd "$1" || return 1
shopt -s nullglob shopt -s nullglob
@@ -500,28 +526,21 @@ py_gtf() {
shopt -u nullglob shopt -u nullglob
for s in "${set2[@]}"; do for s in "${set2[@]}"; do
debug "pl_analyze $set1 $s"
pl_analyze "$set1" "$s" pl_analyze "$set1" "$s"
debug "pl_terms2tsv $s"
pl_terms2tsv "$s" pl_terms2tsv "$s"
"$PERL" terms2tsv_v4.pl "$s.terms" > "$s.tsv"
done done
# Concat the process ontology outputs from the /REMcReady_lm_only folder # Concat the process ontology outputs from the /REMcReady_lm_only folder
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $out_file" echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2"
echo "TODO: Concatenate_GTF_results.py should be translated to bash" echo "TODO: Concatenate_GTF_results.py should be translated to bash"
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$out_file" "$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) [[ -f $2 ]] || (echo "$2 does not exist"; return 1)
popd || return 1 popd || return 1
} }
# Perform operations in each directory
for d in "$process_dir" "$function_dir" "$component_dir"; do
set1="ORF_List_Without_DAmPs.txt"
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
_process "$d" & # parallelize
done
}
submodule pl_analyze submodule pl_analyze
# @description Perl analyze submodule # @description Perl analyze submodule
@@ -550,23 +569,7 @@ pl_terms2tsv() {
} }
submodule documentation submodule r_compile_gtf
# @section Documentation
# @description Generates markdown documentation from this script using shdoc
documentation() {
debug "Running: ${FUNCNAME[0]}"
# Print markdown to stdout
shdoc < "$SCRIPT"
# Create markdown file
shdoc < "$SCRIPT" > documentation.md
}
# @description Compile GTF in R # @description Compile GTF in R
r_compile_gtf() { r_compile_gtf() {
debug "Running: ${FUNCNAME[0]}" debug "Running: ${FUNCNAME[0]}"
@@ -575,6 +578,18 @@ r_compile_gtf() {
} }
submodule documentation
# @section Documentation
# @description Generates markdown documentation from this script using shdoc
documentation() {
debug "Running: ${FUNCNAME[0]}"
# Print markdown to stdout
((DEBUG)) && shdoc < "$SCRIPT"
# Create markdown file
shdoc < "$SCRIPT" > documentation.md
}
# @description The main loop of script-run-workflow # @description The main loop of script-run-workflow
# May eventually need to add git ops # May eventually need to add git ops
# Passes on arguments # Passes on arguments