Break up GTR into module and submodules

This commit is contained in:
2024-07-22 17:15:46 -04:00
parent 258fd070ef
commit be1e9869b3

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env bash
# Copyright 2024 Bryan C. Roessler
#
# This is a code scratchpad for organizing the Hartman Lab Server workflow
# This is a flexible yet opinionated analysis workflow for the Hartman Lab
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
#
# Allow indirect functions
@@ -14,6 +14,7 @@
# @option -p<value> | --project=<value> Include one or more projects in the analysis
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
# @option -m | --markdown Generate the shdoc markdown file for this program
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
# @option -d | --debug Turn on extra debugging output
# @option -h | --help Print help message and exit (overrides other options)
@@ -49,6 +50,8 @@ print_help() {
If no --include is specified, all modules are run
--exclude, -x MODULE
See MODULES section below for list of modules to exclude
--markdown, -m
Generate the shdoc markdown file for this program
--yes, -y, --auto
Always answer yes to questions (non-interactive mode)
--debug, -d
@@ -86,8 +89,8 @@ print_help() {
parse_input() {
debug "Running: ${FUNCNAME[0]}" "$@"
long_opts="project:,include:,exclude:,yes,auto,debug,help"
short_opts="+p:i:x:yhd"
long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help"
short_opts="+p:i:x:mydh"
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
eval set -- "$input"
@@ -117,6 +120,9 @@ parse_input() {
EXCLUDE_MODULES+=("$1")
fi
;;
--markdown|-m)
documentation
;;
--yes|-y|--auto)
declare -g YES=1
;;
@@ -176,8 +182,9 @@ debug() { (( DEBUG )) && echo "Debug: $*"; }
# @section Modules
# @description A module contains a cohesive set of actions/experiments to run on a project
# Use a module when:
# * Building a new type of analysis
# * Combining submodules
# * Building a new type of analysis from scratch
# * Generates project directories
# * Can combine other modules and submodules
#
#
@@ -327,13 +334,13 @@ qhtcp() {
# Our list of submodules (functions) to run for this module
# Put these in the appropriate order of operations
submodules=(
modules=(
r_join_interact
java_extract
r_add_shift_values
r_heat_maps_zscores
r_heat_maps_homology
py_gtf
gtf
r_compile_gtf
)
@@ -364,12 +371,31 @@ qhtcp() {
pushd "$QHTCP_DIR/REMc" || return 1
# Run each submodule
for s in "${submodules[@]}"; do "$s"; done
for s in "${modules[@]}"; do "$s"; done
popd || return 1
}
module gtf
# @section GTF
# @description GTF module for QHTCP
gtf() {
debug "Running: ${FUNCNAME[0]}"
process_dir="GTF/Process"
function_dir="GTF/Function"
component_dir="GTF/Component"
py_gtf "$process_dir"
# Perform operations in each directory in parallel
for d in "$process_dir" "$function_dir" "$component_dir"; do
rsync -a "$process_dir/REMcRdy_lm_only" "$d"/
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
pl_gtf "$d" "$out_file" & # parallelize
done
}
# @section Submodules
# @description Submodules provide functionaility to modules and are reusable between modules
# Use a submodule when:
@@ -474,25 +500,25 @@ r_heat_maps_homology() {
submodule py_gtf
# @description Perform GTF
# @description Perform python portion of GTF
# @arg $1 string Directory to process
py_gtf() {
debug "Running: ${FUNCNAME[0]}"
process_dir="GTF/Process"
function_dir="GTF/Function"
component_dir="GTF/Component"
in_file="REMcRdy_lm_only.csv-finalTable.csv"
out_file="$process_dir/REMcRdy_lm_only/1-0-0-finaltable.csv"
echo "$PYTHON DconJG2.py $in_file $process_dir/"
"$PYTHON" DconJG2.py "$in_file" "$process_dir/"
out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv"
debug "$PYTHON DconJG2.py $in_file $1/"
"$PYTHON" DconJG2.py "$in_file" "$1/"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
unset out_file
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Function/
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Component/
}
# @description Not sure what to name this
# @arg $1 string directory name
_process() {
submodule pl_gtf
# @description Perl module for GTF
# @arg $1 string working directory
# @arg $2 string output file
pl_gtf() {
debug "Running: ${FUNCNAME[0]}" "$@"
set1="ORF_List_Without_DAmPs.txt"
pushd "$1" || return 1
shopt -s nullglob
@@ -500,28 +526,21 @@ py_gtf() {
shopt -u nullglob
for s in "${set2[@]}"; do
debug "pl_analyze $set1 $s"
pl_analyze "$set1" "$s"
debug "pl_terms2tsv $s"
pl_terms2tsv "$s"
"$PERL" terms2tsv_v4.pl "$s.terms" > "$s.tsv"
done
# Concat the process ontology outputs from the /REMcReady_lm_only folder
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $out_file"
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2"
echo "TODO: Concatenate_GTF_results.py should be translated to bash"
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$out_file"
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
[[ -f $2 ]] || (echo "$2 does not exist"; return 1)
popd || return 1
}
# Perform operations in each directory
for d in "$process_dir" "$function_dir" "$component_dir"; do
set1="ORF_List_Without_DAmPs.txt"
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
_process "$d" & # parallelize
done
}
submodule pl_analyze
# @description Perl analyze submodule
@@ -550,23 +569,7 @@ pl_terms2tsv() {
}
submodule documentation
# @section Documentation
# @description Generates markdown documentation from this script using shdoc
documentation() {
debug "Running: ${FUNCNAME[0]}"
# Print markdown to stdout
shdoc < "$SCRIPT"
# Create markdown file
shdoc < "$SCRIPT" > documentation.md
}
submodule r_compile_gtf
# @description Compile GTF in R
r_compile_gtf() {
debug "Running: ${FUNCNAME[0]}"
@@ -575,6 +578,18 @@ r_compile_gtf() {
}
submodule documentation
# @section Documentation
# @description Generates markdown documentation from this script using shdoc
documentation() {
debug "Running: ${FUNCNAME[0]}"
# Print markdown to stdout
((DEBUG)) && shdoc < "$SCRIPT"
# Create markdown file
shdoc < "$SCRIPT" > documentation.md
}
# @description The main loop of script-run-workflow
# May eventually need to add git ops
# Passes on arguments