Break up GTR into module and submodules
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Copyright 2024 Bryan C. Roessler
|
# Copyright 2024 Bryan C. Roessler
|
||||||
#
|
#
|
||||||
# This is a code scratchpad for organizing the Hartman Lab Server workflow
|
# This is a flexible yet opinionated analysis workflow for the Hartman Lab
|
||||||
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
|
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
|
||||||
#
|
#
|
||||||
# Allow indirect functions
|
# Allow indirect functions
|
||||||
@@ -14,6 +14,7 @@
|
|||||||
# @option -p<value> | --project=<value> Include one or more projects in the analysis
|
# @option -p<value> | --project=<value> Include one or more projects in the analysis
|
||||||
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
|
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
|
||||||
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
|
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
|
||||||
|
# @option -m | --markdown Generate the shdoc markdown file for this program
|
||||||
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
||||||
# @option -d | --debug Turn on extra debugging output
|
# @option -d | --debug Turn on extra debugging output
|
||||||
# @option -h | --help Print help message and exit (overrides other options)
|
# @option -h | --help Print help message and exit (overrides other options)
|
||||||
@@ -49,6 +50,8 @@ print_help() {
|
|||||||
If no --include is specified, all modules are run
|
If no --include is specified, all modules are run
|
||||||
--exclude, -x MODULE
|
--exclude, -x MODULE
|
||||||
See MODULES section below for list of modules to exclude
|
See MODULES section below for list of modules to exclude
|
||||||
|
--markdown, -m
|
||||||
|
Generate the shdoc markdown file for this program
|
||||||
--yes, -y, --auto
|
--yes, -y, --auto
|
||||||
Always answer yes to questions (non-interactive mode)
|
Always answer yes to questions (non-interactive mode)
|
||||||
--debug, -d
|
--debug, -d
|
||||||
@@ -86,8 +89,8 @@ print_help() {
|
|||||||
parse_input() {
|
parse_input() {
|
||||||
debug "Running: ${FUNCNAME[0]}" "$@"
|
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||||
|
|
||||||
long_opts="project:,include:,exclude:,yes,auto,debug,help"
|
long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help"
|
||||||
short_opts="+p:i:x:yhd"
|
short_opts="+p:i:x:mydh"
|
||||||
|
|
||||||
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
|
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
|
||||||
eval set -- "$input"
|
eval set -- "$input"
|
||||||
@@ -117,6 +120,9 @@ parse_input() {
|
|||||||
EXCLUDE_MODULES+=("$1")
|
EXCLUDE_MODULES+=("$1")
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
--markdown|-m)
|
||||||
|
documentation
|
||||||
|
;;
|
||||||
--yes|-y|--auto)
|
--yes|-y|--auto)
|
||||||
declare -g YES=1
|
declare -g YES=1
|
||||||
;;
|
;;
|
||||||
@@ -176,8 +182,9 @@ debug() { (( DEBUG )) && echo "Debug: $*"; }
|
|||||||
# @section Modules
|
# @section Modules
|
||||||
# @description A module contains a cohesive set of actions/experiments to run on a project
|
# @description A module contains a cohesive set of actions/experiments to run on a project
|
||||||
# Use a module when:
|
# Use a module when:
|
||||||
# * Building a new type of analysis
|
# * Building a new type of analysis from scratch
|
||||||
# * Combining submodules
|
# * Generates project directories
|
||||||
|
# * Can combine other modules and submodules
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
|
||||||
@@ -327,13 +334,13 @@ qhtcp() {
|
|||||||
|
|
||||||
# Our list of submodules (functions) to run for this module
|
# Our list of submodules (functions) to run for this module
|
||||||
# Put these in the appropriate order of operations
|
# Put these in the appropriate order of operations
|
||||||
submodules=(
|
modules=(
|
||||||
r_join_interact
|
r_join_interact
|
||||||
java_extract
|
java_extract
|
||||||
r_add_shift_values
|
r_add_shift_values
|
||||||
r_heat_maps_zscores
|
r_heat_maps_zscores
|
||||||
r_heat_maps_homology
|
r_heat_maps_homology
|
||||||
py_gtf
|
gtf
|
||||||
r_compile_gtf
|
r_compile_gtf
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -364,12 +371,31 @@ qhtcp() {
|
|||||||
pushd "$QHTCP_DIR/REMc" || return 1
|
pushd "$QHTCP_DIR/REMc" || return 1
|
||||||
|
|
||||||
# Run each submodule
|
# Run each submodule
|
||||||
for s in "${submodules[@]}"; do "$s"; done
|
for s in "${modules[@]}"; do "$s"; done
|
||||||
|
|
||||||
popd || return 1
|
popd || return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
module gtf
|
||||||
|
# @section GTF
|
||||||
|
# @description GTF module for QHTCP
|
||||||
|
gtf() {
|
||||||
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
process_dir="GTF/Process"
|
||||||
|
function_dir="GTF/Function"
|
||||||
|
component_dir="GTF/Component"
|
||||||
|
|
||||||
|
py_gtf "$process_dir"
|
||||||
|
|
||||||
|
# Perform operations in each directory in parallel
|
||||||
|
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
||||||
|
rsync -a "$process_dir/REMcRdy_lm_only" "$d"/
|
||||||
|
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
|
||||||
|
pl_gtf "$d" "$out_file" & # parallelize
|
||||||
|
done
|
||||||
|
|
||||||
|
}
|
||||||
# @section Submodules
|
# @section Submodules
|
||||||
# @description Submodules provide functionaility to modules and are reusable between modules
|
# @description Submodules provide functionaility to modules and are reusable between modules
|
||||||
# Use a submodule when:
|
# Use a submodule when:
|
||||||
@@ -474,25 +500,25 @@ r_heat_maps_homology() {
|
|||||||
|
|
||||||
|
|
||||||
submodule py_gtf
|
submodule py_gtf
|
||||||
# @description Perform GTF
|
# @description Perform python portion of GTF
|
||||||
|
# @arg $1 string Directory to process
|
||||||
py_gtf() {
|
py_gtf() {
|
||||||
debug "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
process_dir="GTF/Process"
|
|
||||||
function_dir="GTF/Function"
|
|
||||||
component_dir="GTF/Component"
|
|
||||||
in_file="REMcRdy_lm_only.csv-finalTable.csv"
|
in_file="REMcRdy_lm_only.csv-finalTable.csv"
|
||||||
out_file="$process_dir/REMcRdy_lm_only/1-0-0-finaltable.csv"
|
out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv"
|
||||||
echo "$PYTHON DconJG2.py $in_file $process_dir/"
|
debug "$PYTHON DconJG2.py $in_file $1/"
|
||||||
"$PYTHON" DconJG2.py "$in_file" "$process_dir/"
|
"$PYTHON" DconJG2.py "$in_file" "$1/"
|
||||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||||
unset out_file
|
}
|
||||||
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Function/
|
|
||||||
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Component/
|
|
||||||
|
|
||||||
# @description Not sure what to name this
|
|
||||||
# @arg $1 string directory name
|
submodule pl_gtf
|
||||||
_process() {
|
# @description Perl module for GTF
|
||||||
|
# @arg $1 string working directory
|
||||||
|
# @arg $2 string output file
|
||||||
|
pl_gtf() {
|
||||||
debug "Running: ${FUNCNAME[0]}" "$@"
|
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||||
|
set1="ORF_List_Without_DAmPs.txt"
|
||||||
pushd "$1" || return 1
|
pushd "$1" || return 1
|
||||||
|
|
||||||
shopt -s nullglob
|
shopt -s nullglob
|
||||||
@@ -500,26 +526,19 @@ py_gtf() {
|
|||||||
shopt -u nullglob
|
shopt -u nullglob
|
||||||
|
|
||||||
for s in "${set2[@]}"; do
|
for s in "${set2[@]}"; do
|
||||||
|
debug "pl_analyze $set1 $s"
|
||||||
pl_analyze "$set1" "$s"
|
pl_analyze "$set1" "$s"
|
||||||
|
debug "pl_terms2tsv $s"
|
||||||
pl_terms2tsv "$s"
|
pl_terms2tsv "$s"
|
||||||
"$PERL" terms2tsv_v4.pl "$s.terms" > "$s.tsv"
|
|
||||||
done
|
done
|
||||||
|
|
||||||
# Concat the process ontology outputs from the /REMcReady_lm_only folder
|
# Concat the process ontology outputs from the /REMcReady_lm_only folder
|
||||||
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $out_file"
|
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2"
|
||||||
echo "TODO: Concatenate_GTF_results.py should be translated to bash"
|
echo "TODO: Concatenate_GTF_results.py should be translated to bash"
|
||||||
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$out_file"
|
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2"
|
||||||
|
|
||||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
[[ -f $2 ]] || (echo "$2 does not exist"; return 1)
|
||||||
popd || return 1
|
popd || return 1
|
||||||
}
|
|
||||||
|
|
||||||
# Perform operations in each directory
|
|
||||||
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
|
||||||
set1="ORF_List_Without_DAmPs.txt"
|
|
||||||
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
|
|
||||||
_process "$d" & # parallelize
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -550,23 +569,7 @@ pl_terms2tsv() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
submodule documentation
|
submodule r_compile_gtf
|
||||||
# @section Documentation
|
|
||||||
# @description Generates markdown documentation from this script using shdoc
|
|
||||||
documentation() {
|
|
||||||
debug "Running: ${FUNCNAME[0]}"
|
|
||||||
|
|
||||||
# Print markdown to stdout
|
|
||||||
shdoc < "$SCRIPT"
|
|
||||||
|
|
||||||
# Create markdown file
|
|
||||||
shdoc < "$SCRIPT" > documentation.md
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# @description Compile GTF in R
|
# @description Compile GTF in R
|
||||||
r_compile_gtf() {
|
r_compile_gtf() {
|
||||||
debug "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
@@ -575,6 +578,18 @@ r_compile_gtf() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
submodule documentation
|
||||||
|
# @section Documentation
|
||||||
|
# @description Generates markdown documentation from this script using shdoc
|
||||||
|
documentation() {
|
||||||
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
# Print markdown to stdout
|
||||||
|
((DEBUG)) && shdoc < "$SCRIPT"
|
||||||
|
# Create markdown file
|
||||||
|
shdoc < "$SCRIPT" > documentation.md
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# @description The main loop of script-run-workflow
|
# @description The main loop of script-run-workflow
|
||||||
# May eventually need to add git ops
|
# May eventually need to add git ops
|
||||||
# Passes on arguments
|
# Passes on arguments
|
||||||
|
|||||||
Reference in New Issue
Block a user