690 lines
21 KiB
Bash
Executable File
690 lines
21 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# Copyright 2024 Bryan C. Roessler
|
||
#
|
||
# This is a flexible yet opinionated analysis workflow for the Hartman Lab
|
||
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
|
||
#
|
||
# Allow indirect functions
|
||
# shellcheck disable=SC2317
|
||
#
|
||
# shdoc info
|
||
# @name HartmanLabWorkflow
|
||
# @brief One script to rule them all (see: xkcd #927)
|
||
# @description Executes the Hartman Lab image analysis workflow
|
||
# @option -p<value> | --project=<value> Include one or more projects in the analysis
|
||
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
|
||
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
|
||
# @option -m | --markdown Generate the shdoc markdown file for this program
|
||
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
||
# @option -d | --debug Turn on extra debugging output
|
||
# @option -h | --help Print help message and exit (overrides other options)
|
||
|
||
DEBUG=1 # Turn debugging ON by default during development
|
||
shopt -s extglob
|
||
|
||
# @section Libraries
|
||
# @description Change these variables to use different libraries
|
||
JAVA="${JAVA:-java}"
|
||
PYTHON="${PYTHON:-python3}"
|
||
PERL="${PERL:-perl}"
|
||
|
||
# @section Help
|
||
# @description Print a helpful message
|
||
print_help() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
|
||
install_dependencies --get-depends # Loads the dependency arrays
|
||
|
||
cat <<-EOF
|
||
USAGE:
|
||
script-run-workflow [[OPTION] [VALUE]]...
|
||
|
||
Some options (--project, --include, --exclude) can be passed multiple times or
|
||
by using comma deliminated strings (see EXAMPLES below)
|
||
|
||
OPTIONS:
|
||
--project, -p PROJECT
|
||
PROJECT should follow the pattern ${PROJECT_PREFIX}_PROJECT_NAME
|
||
--include, -i MODULE
|
||
See MODULES section below for list of available modules
|
||
If no --include is specified, all modules are run
|
||
--exclude, -x MODULE
|
||
See MODULES section below for list of modules to exclude
|
||
--markdown, -m
|
||
Generate the shdoc markdown file for this program
|
||
--yes, -y, --auto
|
||
Always answer yes to questions (non-interactive mode)
|
||
--debug, -d
|
||
Print extra debugging info
|
||
--help, -h
|
||
Print this help message and exit
|
||
|
||
MODULES:
|
||
${ALL_MODULES[*]}
|
||
|
||
SUBMODULES:
|
||
${ALL_SUBMODULES[*]}
|
||
|
||
DEPENDENCIES:
|
||
deb: ${depends_deb[@]}
|
||
rpm: ${depends_rpm[@]}
|
||
brew: ${depends_brew[@]}
|
||
perl: ${depends_perl[@]}
|
||
R: ${depends_r[@]}
|
||
BiocManager: ${depends_bioc[@]}
|
||
|
||
EXAMPLES:
|
||
script-run-workflow --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
|
||
script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
|
||
script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[1]} --module ${ALL_MODULES[2]} --yes
|
||
script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]}
|
||
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT
|
||
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug
|
||
EOF
|
||
}
|
||
|
||
|
||
# @section User Input
|
||
# @description Creates array and switches from user input
|
||
parse_input() {
|
||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||
|
||
long_opts="project:,include:,exclude:,markdown,yes,auto,debug,help"
|
||
short_opts="+p:i:x:mydh"
|
||
|
||
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
|
||
eval set -- "$input"
|
||
while true; do
|
||
case $1 in
|
||
--project|-p)
|
||
shift
|
||
if [[ $1 == *','* ]] ; then # check for commas
|
||
IFS=',' read -ra PROJECTS <<< "$1"
|
||
else
|
||
PROJECTS+=("$1")
|
||
fi
|
||
;;
|
||
--include|-i)
|
||
shift
|
||
if [[ $1 == *','* ]] ; then # check for commas
|
||
IFS=',' read -ra INCLUDE_MODULES <<< "$1"
|
||
else
|
||
INCLUDE_MODULES+=("$1")
|
||
fi
|
||
;;
|
||
--exclude|-x)
|
||
shift
|
||
if [[ $1 == *','* ]] ; then # check for commas
|
||
IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
|
||
else
|
||
EXCLUDE_MODULES+=("$1")
|
||
fi
|
||
;;
|
||
--markdown|-m)
|
||
documentation
|
||
;;
|
||
--yes|-y|--auto)
|
||
declare -g YES=1
|
||
;;
|
||
--debug|-d)
|
||
declare -g DEBUG=1
|
||
;;
|
||
--help|-h)
|
||
print_help; exit 0
|
||
;;
|
||
--)
|
||
shift
|
||
break
|
||
;;
|
||
esac
|
||
shift
|
||
done
|
||
else
|
||
err "Incorrect options provided"; exit 1
|
||
fi
|
||
}
|
||
|
||
# @section Helper functions
|
||
# @internal
|
||
module() {
|
||
debug "Adding $1 module"
|
||
ALL_MODULES+=("$1")
|
||
declare -gA "$1"
|
||
}
|
||
submodule() {
|
||
debug "Adding $1 submodule"
|
||
ALL_SUBMODULES+=("$1")
|
||
declare -gA "$1"
|
||
}
|
||
# This function will only work if users have an actual name registered on the server
|
||
# TODO for now just use username
|
||
# user_initials() {
|
||
# user_record="$(getent passwd "$(whoami)")"
|
||
# user_gecos_field="$(echo "$user_record" | cut -d ':' -f 5)"
|
||
# user_full_name="$(echo "$user_gecos_field" | cut -d ',' -f 1)"
|
||
# last="${user_full_name#* }"
|
||
# echo "${user_full_name:0:1}${last:0:1}"
|
||
# }
|
||
ask() {
|
||
declare response
|
||
(( YES )) && return 0
|
||
read -r -p "$* [y/N]: " response
|
||
[[ ${response,,} =~ ^(yes|y)$ ]]
|
||
}
|
||
err() { echo "Error: $*" >&2; }
|
||
ask_pn() {
|
||
declare -g PROJECT
|
||
read -r -p "Enter a full project name (ex. ${PROJECT_PREFIX}_PROJECT_NAME): " PROJECT
|
||
}
|
||
debug() { (( DEBUG )) && echo "Debug: $*"; }
|
||
|
||
|
||
# @section Modules
|
||
# @description A module contains a cohesive set of actions/experiments to run on a project
|
||
# Use a module when:
|
||
# * Building a new type of analysis from scratch
|
||
# * Generates project directories
|
||
# * Can combine other modules and submodules
|
||
#
|
||
#
|
||
|
||
module install_dependencies
|
||
# @description Installs dependencies for the workflow
|
||
install_dependencies() {
|
||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||
|
||
# Dependency arrays
|
||
depends_rpm=(graphviz pandoc pdftk-java gd-devel shdoc)
|
||
depends_deb=(graphviz pandoc pdftk-java libgd-dev shdoc)
|
||
depends_brew=(graphiz pandoc gd pdftk-java shdoc)
|
||
depends_perl=(File::Map ExtUtils::PkgConfig GD GO::TermFinder)
|
||
depends_r=(BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
|
||
plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc rmarkdown)
|
||
depends_bioc=(org.Sc.sgd.db)
|
||
|
||
[[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars
|
||
|
||
# Install system-wide dependencies
|
||
echo "Installing system dependencies"
|
||
case "$(uname -s)" in
|
||
Linux*|CYGWIN*|MINGW*)
|
||
ask "Detected Linux platform, continue?" || return 1
|
||
echo "You may be prompted for your sudo password to install system packages"
|
||
if hash dnf &>/dev/null; then
|
||
sudo dnf install "${depends_rpm[@]}"
|
||
elif hash apt &>/dev/null; then
|
||
sudo apt install "${depends_deb[@]}"
|
||
fi
|
||
;;
|
||
Darwin*)
|
||
ask "Detected Mac platform, continue?" || return 1
|
||
export HOMEBREW_BREW_GIT_REMOTE="https://github.com/Homebrew/brew"
|
||
curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh|bash
|
||
brew install "${depends_brew[@]}"
|
||
;;
|
||
*)
|
||
echo "Your system could not be detected, please install dependencies manually"
|
||
;;
|
||
esac
|
||
|
||
# Install perl CPAN modules
|
||
echo "Installing perl CPAN modules"
|
||
debug "cpan" "${depends_perl[@]}"
|
||
cpan "${depends_perl[@]}"
|
||
|
||
# Install R packages
|
||
echo "Installing R packages"
|
||
|
||
depends_r_str=""
|
||
depends_r_to_string() {
|
||
for d in "${depends_r[@]}"; do
|
||
depends_r_str+="$d\", \""
|
||
done
|
||
depends_r_str="${depends_r_str::-3}" # strip last , " (comma and quote)
|
||
}
|
||
depends_r_to_string
|
||
|
||
debug "Rscript -e install.packages(c(\"$depends_r_str), dep=TRUE, repos=\"https://cloud.r-project.org\")"
|
||
Rscript -e "install.packages(c(\"$depends_r_str), dep=TRUE, repos=\"https://cloud.r-project.org\")"
|
||
Rscript -e "BiocManager::install(\"${depends_bioc[0]}\")"
|
||
}
|
||
|
||
|
||
module init_job
|
||
# @section Initialize a new job in the scans directory
|
||
# @description Create a new ExpJobs project
|
||
# TODO Copy over source image directories from robot - are these alse named by the ExpJobs name?
|
||
init_job() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
|
||
if [[ -d $SCAN_DIR ]]; then
|
||
ask "$SCAN_DIR already exists, re-initialize?" || return 0
|
||
else
|
||
ask "Initialize a new project at $SCAN_DIR?" || return 1
|
||
mkdir -p "$SCAN_DIR"
|
||
fi
|
||
|
||
[[ -d $SCAN_DIR/MasterPlateFiles ]] || mkdir -p "$SCAN_DIR/MasterPlateFiles"
|
||
|
||
DRUG_MEDIA_FILE="$SCAN_DIR/MasterPlateFiles/DrugMedia_$PROJECT.xls"
|
||
MASTER_PLATE_FILE="$SCAN_DIR/MasterPlateFiles/MasterPlate_$PROJECT.xls"
|
||
|
||
# TODO Where are the actual templates?
|
||
for f in $DRUG_MEDIA_FILE $MASTER_PLATE_FILE; do
|
||
touch "$f"
|
||
done
|
||
}
|
||
|
||
|
||
module easy
|
||
# @section EASY
|
||
# @description Start an EASY analysis
|
||
# The QHTCPImageFolders and ‘MasterPlateFiles’ folder are the inputs for image analysis with EASY software.
|
||
# EASY will automatically generate a ‘Results’ directory (within the ExpJobs/‘ExperimentJob’ folder) w/ timestamp and an optional short description provided by the user (Fig.2).
|
||
# The ‘Results’ directory is created and entered, using the “File >> New Experiment” dropdown in EASY.
|
||
# Multiple ‘Results’ files may be created (and uniquely named) within an ‘ExperimentJob’ folder.
|
||
easy() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
EASY="/mnt/data/EASY/EasyDev2024/BU/EASY240430AppExported/EstartConsole.m"
|
||
|
||
pushd "$SCAN_DIR" || return 1
|
||
|
||
# Launch graphical matlab if the user wants
|
||
ask "Start EASY in MATLAB? This requires a GUI." && matlab -nosplash -r "$EASY"
|
||
|
||
# glob EASY output and make sure it exists
|
||
shopt -s nullglob
|
||
EASY_RESULTS_DIRS=( Results* )
|
||
shopt -u nullglob
|
||
[[ ${#EASY_RESULTS_DIRS} -ge 1 ]] || (echo "Missing EASY output"; exit 1)
|
||
|
||
declare -a EASY_OUT_ARRAY
|
||
for EASY_RESULTS_DIR in "${EASY_RESULTS_DIRS[@]}"; do
|
||
[[ -d $EASY_RESULTS_DIR ]] && echo "Found EASY Results directory: $EASY_RESULTS_DIR"
|
||
EASY_PRINT_RESULTS_DIR="$EASY_RESULTS_DIR/PrintResults"
|
||
[[ -d $EASY_PRINT_RESULTS_DIR ]] && echo "Found EASY PrintResults directory: $EASY_PRINT_RESULTS_DIR"
|
||
EASY_PRINT_RESULTS_FILES=(
|
||
"$EASY_PRINT_RESULTS_DIR/!!"*
|
||
"$EASY_PRINT_RESULTS_DIR"/NoGrowth_*.txt
|
||
"$EASY_PRINT_RESULTS_DIR"/GrowthOnly_*.txt
|
||
)
|
||
EASY_OUT_ARRAY+=("$EASY_RESULTS_DIR" "$EASY_PRINT_RESULTS_DIR" "${EASY_PRINT_RESULTS_FILES[@]}")
|
||
done
|
||
|
||
echo "EASY OUTPUT ARRAY: " "${EASY_OUT_ARRAY[@]}"
|
||
}
|
||
|
||
|
||
module ezview
|
||
# @section EZView
|
||
ezview() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
EZVIEW_DIR="/mnt/data/EZVIEW"
|
||
echo "$EZVIEW_DIR"
|
||
}
|
||
|
||
|
||
module qhtcp
|
||
# @section QHTCP
|
||
# @description Main QHTCP module (functional rewrite of REMcMaster3.sh)
|
||
qhtcp() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
TEMPLATE_DIR="$SCRIPT_DIR/templates/qhtcp"
|
||
QHTCP_DIR="/mnt/data/StudiesQHTCP/$PROJECT"
|
||
|
||
# Our list of submodules (functions) to run for this module
|
||
# Put these in the appropriate order of operations
|
||
modules=(
|
||
r_join_interact
|
||
java_extract
|
||
r_add_shift_values
|
||
r_heat_maps_zscores
|
||
r_heat_maps_homology
|
||
gtf
|
||
r_compile_gtf
|
||
)
|
||
|
||
while [[ -d $QHTCP_DIR ]]; do
|
||
echo "A project already exists at $QHTCP_DIR"
|
||
ask "Safely update $QHTCP_DIR from the $TEMPLATE_DIR template?" && break
|
||
if ask "Back up $QHTCP_DIR to $QHTCP_DIR.bk and start fresh?"; then
|
||
mv "$QHTCP_DIR" "$QHTCP_DIR.bk" || (echo "Backup unsuccessful, exiting"; exit 1)
|
||
fi
|
||
done
|
||
|
||
# Copy template to QHTCP project directory
|
||
if rsync --archive --update "$TEMPLATE_DIR"/ "$QHTCP_DIR"; then
|
||
echo "New project created at $QHTCP_DIR"
|
||
fi
|
||
|
||
# Create StudyInfo.csv
|
||
# Right now this is identical to the template but we can change it later
|
||
cat <<-EOF > "$QHTCP_DIR/Code/StudyInfo.csv"
|
||
ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy
|
||
1,ExpName1,NA,NA,UserInitials
|
||
2,ExpName2,NA,NA,UserInitials
|
||
3,ExpName3,NA,NA,UserInitials
|
||
4,ExpName4,NA,NA,UserInitials
|
||
EOF
|
||
|
||
# Enter REMc directory to run the scripts there
|
||
pushd "$QHTCP_DIR/REMc" || return 1
|
||
|
||
# Run each submodule
|
||
for s in "${modules[@]}"; do "$s"; done
|
||
|
||
popd || return 1
|
||
}
|
||
|
||
|
||
module gtf
|
||
# @section GTF
|
||
# @description GTF module for QHTCP
|
||
gtf() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
process_dir="GTF/Process"
|
||
function_dir="GTF/Function"
|
||
component_dir="GTF/Component"
|
||
out_dir="REMcRdy_lm_only"
|
||
|
||
py_gtf_dcon "$process_dir" "$out_dir"
|
||
|
||
|
||
# Perform operations in each directory in parallel
|
||
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
||
rsync -a "$process_dir/$out_dir" "$d"/
|
||
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
|
||
pl_gtf "$d" "$out_dir" & # parallelize
|
||
py_gtf_concat "$d" "$out_dir" "$out_file"
|
||
done
|
||
}
|
||
|
||
|
||
# @section Submodules
|
||
# @description Submodules provide functionaility to modules and are reusable between modules
|
||
# Use a submodule for:
|
||
# * Calling external scripts
|
||
# * Performing repetitive tasks
|
||
# *
|
||
#
|
||
|
||
submodule r_join_interact
|
||
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
||
r_join_interact() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
debug "Rscript JoinInteractExps3dev.R"
|
||
Rscript JoinInteractExps3dev.R
|
||
out_file="REMcRdy_lm_only.csv"
|
||
out_file2="Shift_only.csv"
|
||
for f in "$out_file" "$out_file2"; do
|
||
[[ -f $f ]] || (echo "$f does not exist"; return 1)
|
||
done
|
||
}
|
||
|
||
|
||
submodule java_extract
|
||
# @description Jingyu's REMc java utility using file input file REMcRdy_lm_only.csv
|
||
# and output REMcRdy_lm_only.csv-finalTable.csv
|
||
java_extract() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
classpath="jingyuJava_1_7_extractLib.jar"
|
||
out_file="REMcRdy_lm_only.csv-finalTable.csv"
|
||
|
||
# backup REMcRdy_lm_only.csv-finalTable.csv
|
||
[[ -f $out_file ]] && mv "$out_file" "$out_file.bk"
|
||
|
||
java_cmd=(
|
||
"$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath "$classpath" ExecMain
|
||
"REMcRdy_lm_only.csv"
|
||
"GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"
|
||
"ORF_List_Without_DAmPs.txt" 1 true true
|
||
)
|
||
|
||
debug "${java_cmd[@]}"
|
||
"${java_cmd[@]}"
|
||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||
}
|
||
|
||
|
||
submodule r_add_shift_values
|
||
# @description Add shift values back to REMcRdy_lm_only.csv-finalTable.csv
|
||
# and output "REMcWithShift.csv" for use with the REMc heat maps
|
||
r_add_shift_values() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
out_file="REMcHeatmaps/REMcWithShift.csv"
|
||
debug "Rscript AddShiftVals2.R"
|
||
Rscript AddShiftVals2.R
|
||
rm -f "REMcHeatmaps/"*.pdf
|
||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||
}
|
||
|
||
|
||
submodule r_heat_maps_zscores
|
||
# @description Execute REMcHeatmaps_zscores.R
|
||
r_heat_maps_zscores() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
out_file="REMcHeatmaps/compiledREMcHeatmaps.pdf"
|
||
debug "Rscript REMcHeatmaps_zscores.R"
|
||
Rscript REMcHeatmaps_zscores.R
|
||
pdfs=(REMcHeatmaps/*.pdf)
|
||
debug "pdftk ${pdfs[*]} output $out_file"
|
||
pdftk "${pdfs[@]}" output "$out_file"
|
||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||
}
|
||
|
||
|
||
submodule r_heat_maps_homology
|
||
# @description Execute REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R
|
||
r_heat_maps_homology() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
work_dir="REMcHeatmapsWithHomology"
|
||
source_file="REMcHeatmaps/REMcWithShift.csv"
|
||
target_file="$work_dir/REMcWithShift.csv"
|
||
out_file="$work_dir/Homology/compiledREMcHomologyHeatmaps.pdf"
|
||
debug "rsync --archive $source_file $target_file"
|
||
rsync --archive "$source_file" "$target_file"
|
||
|
||
# Clean old output
|
||
rm "$work_dir/Homology/"*.{pdf,csv}
|
||
|
||
pushd "$work_dir" || return 1
|
||
Rscript \
|
||
REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R \
|
||
REMcWithShift.csv \
|
||
Homology \
|
||
17_0503_DAmPs_Only.txt \
|
||
Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv
|
||
popd || return 1
|
||
|
||
pdfs=("$work_dir"/Homology/*.pdf)
|
||
pdftk "${pdfs[@]}" output "$out_file"
|
||
|
||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||
}
|
||
|
||
|
||
submodule py_gtf_dcon
|
||
# @description Perform python dcon portion of GTF
|
||
# @arg $1 string Directory to process
|
||
# @arg $2 string Output directory name
|
||
py_gtf_dcon() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
in_file="REMcRdy_lm_only.csv-finalTable.csv"
|
||
out_file="$1/$2/1-0-0-finaltable.csv"
|
||
debug "$PYTHON DconJG2.py $in_file $1/"
|
||
"$PYTHON" DconJG2.py "$in_file" "$1/"
|
||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||
}
|
||
|
||
|
||
submodule pl_gtf
|
||
# @description Perl modules for GTF
|
||
# @arg $1 string working directory
|
||
# @arg $2 string output directory name to look for txt files
|
||
pl_gtf() {
|
||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||
|
||
pushd "$1" || return 1
|
||
set1="ORF_List_Without_DAmPs.txt"
|
||
shopt -s nullglob
|
||
set2=("$2"/*.txt) # glob them all
|
||
shopt -u nullglob
|
||
|
||
for s2 in "${set2[@]}"; do
|
||
debug "pl_analyze $set1 $s2"
|
||
pl_gtf_analyze "$set1" "$s2"
|
||
debug "pl_terms2tsv $s2"
|
||
pl_gtf_terms2tsv "$s2"
|
||
done
|
||
|
||
popd || return 1
|
||
}
|
||
|
||
|
||
submodule pl_gtf_analyze
|
||
# @description Perl analyze submodule
|
||
# This seems weird to me because we're just overwriting the same data for all set2 members
|
||
# https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl
|
||
# Is there a reason you need a custom version and not the original from cpan?
|
||
# @arg $1 string Set 1
|
||
# @arg $2 string Set 2
|
||
pl_gtf_analyze() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
script="analyze_v2.pl"
|
||
an="gene_association.sgd"
|
||
out_file="gene_ontology_edit.obo"
|
||
debug "$PERL $script -an $an -as P -o $out_file -b $1 $2"
|
||
"$PERL" "$script" -an "$an" -as P -o "$out_file" -b "$1" "$2"
|
||
}
|
||
|
||
|
||
submodule pl_gtf_terms2tsv
|
||
# @description Perl terms2tsv submodule
|
||
# Probably should be translated to shell/python
|
||
# @arg $1 string Set 2
|
||
pl_gtf_terms2tsv() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
script="terms2tsv_v4.pl"
|
||
debug "$PERL $script $1.terms > $1.tsv"
|
||
"$PERL" "$script" "$1.terms" > "$1.tsv"
|
||
}
|
||
|
||
|
||
submodule py_gtf_concat
|
||
# @description Python concat submodule for GTF
|
||
# Concat the process ontology outputs from the /REMcReady_lm_only folder
|
||
# Probably should be translated to bash
|
||
# @arg $1 string working directory
|
||
# @arg $2 string output directory name to look for txt files
|
||
# @arg $3 string output file
|
||
py_gtf_concat() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
pushd "$1" || return 1
|
||
script="Concatenate_GTF_results.py"
|
||
debug "$PYTHON $script $2/ $3"
|
||
"$PYTHON" "$script" "$2/" "$3"
|
||
[[ -f $3 ]] || (echo "$3 does not exist"; return 1)
|
||
popd || return 1
|
||
}
|
||
|
||
|
||
submodule r_compile_gtf
|
||
# @description Compile GTF in R
|
||
r_compile_gtf() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
debug "Rscript CompileGTF.R"
|
||
Rscript CompileGTF.R
|
||
}
|
||
|
||
|
||
submodule documentation
|
||
# @section Documentation
|
||
# @description Generates markdown documentation from this script using shdoc
|
||
documentation() {
|
||
debug "Running: ${FUNCNAME[0]}"
|
||
# Print markdown to stdout
|
||
((DEBUG)) && shdoc < "$SCRIPT"
|
||
# Create markdown file
|
||
shdoc < "$SCRIPT" > documentation.md
|
||
}
|
||
|
||
|
||
# @description The main loop of script-run-workflow
|
||
# May eventually need to add git ops
|
||
# Passes on arguments
|
||
# Most variables in main() are user configurable or can be overriden by env
|
||
main() {
|
||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||
|
||
# Where are we located?
|
||
SCRIPT=$(realpath -s "${BASH_SOURCE[0]}")
|
||
SCRIPT_DIR=$(dirname "$SCRIPT")
|
||
|
||
# Set the automatic project directory prefix
|
||
PROJECT_PREFIX="$(whoami)_$(date +%y_%m_%d)"
|
||
san() { [[ $1 =~ .+_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_.+ ]]; } # sanitizer regex for prefix
|
||
|
||
declare -ag PROJECTS=() # this array will hold all of the projects for this run
|
||
|
||
parse_input "$@" # parse arguments with getopt
|
||
|
||
# Prompt user for the PROJECT if we still don't have one
|
||
if [[ ${#PROJECTS[@]} -eq 0 ]]; then # still allows for environment overrides
|
||
ask_pn
|
||
PROJECTS+=("$PROJECT")
|
||
fi
|
||
|
||
for i in "${!PROJECTS[@]}"; do
|
||
if ! san "${PROJECTS[i]}"; then
|
||
echo "Project name ${PROJECTS[i]} is invalid"
|
||
echo "Enter a replacement"
|
||
ask_pn
|
||
san "$PROJECT" || (echo "RTFM"; return 1)
|
||
PROJECTS[i]="$PROJECT"
|
||
fi
|
||
done
|
||
|
||
SCANS_DIR="${SCANS_DIR:-"/mnt/data/ExpJobs"}" # TODO propose changing this to something else
|
||
|
||
# If we don't catch with getopt or env, run all
|
||
if [[ ${#INCLUDE_MODULES[@]} -eq 0 ]]; then
|
||
MODULES=("${ALL_MODULES[@]}")
|
||
else
|
||
MODULES=("${INCLUDE_MODULES[@]}")
|
||
fi
|
||
|
||
# Exclude modules from --exclude
|
||
arr=()
|
||
for m in "${MODULES[@]}"; do
|
||
[[ " ${EXCLUDE_MODULES[*]} " =~ [[:space:]]${m}[[:space:]] ]] || arr+=("$m")
|
||
done
|
||
MODULES=("${arr[@]}")
|
||
unset arr
|
||
|
||
# Sanitize MODULES
|
||
for i in "${!MODULES[@]}"; do
|
||
if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then
|
||
echo "Module $m not in the module list"
|
||
echo "Available modules: ${ALL_MODULES[*]}"
|
||
read -r -p "Enter replacement module name: " MODULE
|
||
! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULE}[[:space:]] ]] || (echo "RTFM"; return 1)
|
||
MODULES[i]="$MODULE"
|
||
fi
|
||
done
|
||
|
||
# Loop over projects
|
||
for PROJECT in "${PROJECTS[@]}"; do
|
||
SCAN_DIR="$SCANS_DIR/$PROJECT"
|
||
|
||
# Run selected modules
|
||
for m in "${MODULES[@]}"; do
|
||
ask "Run $m" && "$m"
|
||
done
|
||
done
|
||
|
||
}
|
||
|
||
main "$@"
|
||
|
||
exit $?
|