Break up GTR into module and submodules
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2024 Bryan C. Roessler
|
||||
#
|
||||
# This is a code scratchpad for organizing the Hartman Lab Server workflow
|
||||
# This is a flexible yet opinionated analysis workflow for the Hartman Lab
|
||||
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
|
||||
#
|
||||
# Allow indirect functions
|
||||
@@ -14,6 +14,7 @@
|
||||
# @option -p<value> | --project=<value> Include one or more projects in the analysis
|
||||
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
|
||||
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
|
||||
# @option -m | --markdown Generate the shdoc markdown file for this program
|
||||
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
||||
# @option -d | --debug Turn on extra debugging output
|
||||
# @option -h | --help Print help message and exit (overrides other options)
|
||||
@@ -49,6 +50,8 @@ print_help() {
|
||||
If no --include is specified, all modules are run
|
||||
--exclude, -x MODULE
|
||||
See MODULES section below for list of modules to exclude
|
||||
--markdown, -m
|
||||
Generate the shdoc markdown file for this program
|
||||
--yes, -y, --auto
|
||||
Always answer yes to questions (non-interactive mode)
|
||||
--debug, -d
|
||||
@@ -86,8 +89,8 @@ print_help() {
|
||||
parse_input() {
|
||||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||
|
||||
long_opts="project:,include:,exclude:,yes,auto,debug,help"
|
||||
short_opts="+p:i:x:yhd"
|
||||
long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help"
|
||||
short_opts="+p:i:x:mydh"
|
||||
|
||||
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
|
||||
eval set -- "$input"
|
||||
@@ -117,6 +120,9 @@ parse_input() {
|
||||
EXCLUDE_MODULES+=("$1")
|
||||
fi
|
||||
;;
|
||||
--markdown|-m)
|
||||
documentation
|
||||
;;
|
||||
--yes|-y|--auto)
|
||||
declare -g YES=1
|
||||
;;
|
||||
@@ -176,8 +182,9 @@ debug() { (( DEBUG )) && echo "Debug: $*"; }
|
||||
# @section Modules
|
||||
# @description A module contains a cohesive set of actions/experiments to run on a project
|
||||
# Use a module when:
|
||||
# * Building a new type of analysis
|
||||
# * Combining submodules
|
||||
# * Building a new type of analysis from scratch
|
||||
# * Generates project directories
|
||||
# * Can combine other modules and submodules
|
||||
#
|
||||
#
|
||||
|
||||
@@ -327,13 +334,13 @@ qhtcp() {
|
||||
|
||||
# Our list of submodules (functions) to run for this module
|
||||
# Put these in the appropriate order of operations
|
||||
submodules=(
|
||||
modules=(
|
||||
r_join_interact
|
||||
java_extract
|
||||
r_add_shift_values
|
||||
r_heat_maps_zscores
|
||||
r_heat_maps_homology
|
||||
py_gtf
|
||||
gtf
|
||||
r_compile_gtf
|
||||
)
|
||||
|
||||
@@ -364,12 +371,31 @@ qhtcp() {
|
||||
pushd "$QHTCP_DIR/REMc" || return 1
|
||||
|
||||
# Run each submodule
|
||||
for s in "${submodules[@]}"; do "$s"; done
|
||||
for s in "${modules[@]}"; do "$s"; done
|
||||
|
||||
popd || return 1
|
||||
}
|
||||
|
||||
|
||||
module gtf
|
||||
# @section GTF
|
||||
# @description GTF module for QHTCP
|
||||
gtf() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
process_dir="GTF/Process"
|
||||
function_dir="GTF/Function"
|
||||
component_dir="GTF/Component"
|
||||
|
||||
py_gtf "$process_dir"
|
||||
|
||||
# Perform operations in each directory in parallel
|
||||
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
||||
rsync -a "$process_dir/REMcRdy_lm_only" "$d"/
|
||||
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
|
||||
pl_gtf "$d" "$out_file" & # parallelize
|
||||
done
|
||||
|
||||
}
|
||||
# @section Submodules
|
||||
# @description Submodules provide functionaility to modules and are reusable between modules
|
||||
# Use a submodule when:
|
||||
@@ -474,52 +500,45 @@ r_heat_maps_homology() {
|
||||
|
||||
|
||||
submodule py_gtf
|
||||
# @description Perform GTF
|
||||
# @description Perform python portion of GTF
|
||||
# @arg $1 string Directory to process
|
||||
py_gtf() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
process_dir="GTF/Process"
|
||||
function_dir="GTF/Function"
|
||||
component_dir="GTF/Component"
|
||||
in_file="REMcRdy_lm_only.csv-finalTable.csv"
|
||||
out_file="$process_dir/REMcRdy_lm_only/1-0-0-finaltable.csv"
|
||||
echo "$PYTHON DconJG2.py $in_file $process_dir/"
|
||||
"$PYTHON" DconJG2.py "$in_file" "$process_dir/"
|
||||
out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv"
|
||||
debug "$PYTHON DconJG2.py $in_file $1/"
|
||||
"$PYTHON" DconJG2.py "$in_file" "$1/"
|
||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||
unset out_file
|
||||
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Function/
|
||||
rsync -a "$process_dir/REMcRdy_lm_only" GTF/Component/
|
||||
}
|
||||
|
||||
# @description Not sure what to name this
|
||||
# @arg $1 string directory name
|
||||
_process() {
|
||||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||
pushd "$1" || return 1
|
||||
|
||||
shopt -s nullglob
|
||||
set2=(REMcRdy_lm_only/*.txt)
|
||||
shopt -u nullglob
|
||||
submodule pl_gtf
|
||||
# @description Perl module for GTF
|
||||
# @arg $1 string working directory
|
||||
# @arg $2 string output file
|
||||
pl_gtf() {
|
||||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||
set1="ORF_List_Without_DAmPs.txt"
|
||||
pushd "$1" || return 1
|
||||
|
||||
for s in "${set2[@]}"; do
|
||||
pl_analyze "$set1" "$s"
|
||||
pl_terms2tsv "$s"
|
||||
"$PERL" terms2tsv_v4.pl "$s.terms" > "$s.tsv"
|
||||
done
|
||||
shopt -s nullglob
|
||||
set2=(REMcRdy_lm_only/*.txt)
|
||||
shopt -u nullglob
|
||||
|
||||
# Concat the process ontology outputs from the /REMcReady_lm_only folder
|
||||
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $out_file"
|
||||
echo "TODO: Concatenate_GTF_results.py should be translated to bash"
|
||||
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$out_file"
|
||||
|
||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||
popd || return 1
|
||||
}
|
||||
|
||||
# Perform operations in each directory
|
||||
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
||||
set1="ORF_List_Without_DAmPs.txt"
|
||||
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
|
||||
_process "$d" & # parallelize
|
||||
for s in "${set2[@]}"; do
|
||||
debug "pl_analyze $set1 $s"
|
||||
pl_analyze "$set1" "$s"
|
||||
debug "pl_terms2tsv $s"
|
||||
pl_terms2tsv "$s"
|
||||
done
|
||||
|
||||
# Concat the process ontology outputs from the /REMcReady_lm_only folder
|
||||
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2"
|
||||
echo "TODO: Concatenate_GTF_results.py should be translated to bash"
|
||||
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2"
|
||||
|
||||
[[ -f $2 ]] || (echo "$2 does not exist"; return 1)
|
||||
popd || return 1
|
||||
}
|
||||
|
||||
|
||||
@@ -550,23 +569,7 @@ pl_terms2tsv() {
|
||||
}
|
||||
|
||||
|
||||
submodule documentation
|
||||
# @section Documentation
|
||||
# @description Generates markdown documentation from this script using shdoc
|
||||
documentation() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
|
||||
# Print markdown to stdout
|
||||
shdoc < "$SCRIPT"
|
||||
|
||||
# Create markdown file
|
||||
shdoc < "$SCRIPT" > documentation.md
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
submodule r_compile_gtf
|
||||
# @description Compile GTF in R
|
||||
r_compile_gtf() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
@@ -575,6 +578,18 @@ r_compile_gtf() {
|
||||
}
|
||||
|
||||
|
||||
submodule documentation
|
||||
# @section Documentation
|
||||
# @description Generates markdown documentation from this script using shdoc
|
||||
documentation() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
# Print markdown to stdout
|
||||
((DEBUG)) && shdoc < "$SCRIPT"
|
||||
# Create markdown file
|
||||
shdoc < "$SCRIPT" > documentation.md
|
||||
}
|
||||
|
||||
|
||||
# @description The main loop of script-run-workflow
|
||||
# May eventually need to add git ops
|
||||
# Passes on arguments
|
||||
|
||||
Reference in New Issue
Block a user