More progress on initial commit
This commit is contained in:
@@ -1,7 +1,9 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Copyright 2024 Bryan C. Roessler
|
# Copyright 2024 Bryan C. Roessler
|
||||||
# This is currently a code scratchpad for organizing the Hartman Lab Server workflow
|
#
|
||||||
|
# This is a code scratchpad for organizing the Hartman Lab Server workflow
|
||||||
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
|
# It contains a mixture of code/pseudocode and shouldn't be run until this message is removed
|
||||||
|
#
|
||||||
# Allow indirect functions
|
# Allow indirect functions
|
||||||
# shellcheck disable=SC2317
|
# shellcheck disable=SC2317
|
||||||
#
|
#
|
||||||
@@ -9,8 +11,14 @@
|
|||||||
# @name HartmanLabWorkflow
|
# @name HartmanLabWorkflow
|
||||||
# @brief One script to rule them all (see: xkcd #927)
|
# @brief One script to rule them all (see: xkcd #927)
|
||||||
# @description Executes the Hartman Lab image analysis workflow
|
# @description Executes the Hartman Lab image analysis workflow
|
||||||
# @arg $1 string A project name
|
# @option -p<value> | --project=<value> Include one or more projects in the analysis
|
||||||
#
|
# @option -i<value> | --include=<value> Include one or more modules in the analysis (default: all modules)
|
||||||
|
# @option -x<value> | --exclude=<value> Exclude one or more modules in the analysis
|
||||||
|
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
||||||
|
# @option -d | --debug Turn on extra debugging output
|
||||||
|
# @option -h | --help Print help message and exit (overrides other options)
|
||||||
|
|
||||||
|
DEBUG=1 # Turn debugging ON by default during development
|
||||||
shopt -s extglob
|
shopt -s extglob
|
||||||
|
|
||||||
# @section Libraries
|
# @section Libraries
|
||||||
@@ -22,42 +30,53 @@ PERL="${PERL:-perl}"
|
|||||||
# @section Help
|
# @section Help
|
||||||
# @description Print a helpful message
|
# @description Print a helpful message
|
||||||
print_help() {
|
print_help() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
|
||||||
|
install_dependencies --get-depends # Loads the dependency arrays
|
||||||
|
|
||||||
cat <<-EOF
|
cat <<-EOF
|
||||||
USAGE:
|
USAGE:
|
||||||
script-run-workflow [[OPTION] [VALUE]]...
|
script-run-workflow [[OPTION] [VALUE]]...
|
||||||
|
|
||||||
Some options (--project, --module) can be passed multiple times for batch operations.
|
Some options (--project, --include, --exclude) can be passed multiple times or
|
||||||
|
by using comma deliminated strings (see EXAMPLES below)
|
||||||
|
|
||||||
OPTIONS:
|
OPTIONS:
|
||||||
--project, -p PROJECT
|
--project, -p PROJECT
|
||||||
PROJECT should follow the pattern ${PROJECT_PREFIX}_UNIQUE_PROJECT_NAME
|
PROJECT should follow the pattern ${PROJECT_PREFIX}_PROJECT_NAME
|
||||||
--include, -i MODULE
|
--include, -i MODULE
|
||||||
See MODULES section below for list of available modules
|
See MODULES section below for list of available modules
|
||||||
If no --module is specified, all modules are run
|
If no --include is specified, all modules are run
|
||||||
--exclude, -x MODULE
|
--exclude, -x MODULE
|
||||||
See MODULES section below for list of available modules
|
See MODULES section below for list of modules to exclude
|
||||||
--yes, -y, --auto
|
--yes, -y, --auto
|
||||||
Always answer yes to questions (non-interactive mode)
|
Always answer yes to questions (non-interactive mode)
|
||||||
--debug, -d
|
--debug, -d
|
||||||
Print extra debugging info
|
Print extra debugging info
|
||||||
--help, -h
|
--help, -h
|
||||||
Print this help message and exit
|
Print this help message and exit
|
||||||
|
|
||||||
MODULES:
|
MODULES:
|
||||||
${ALL_MODULES[*]}
|
${ALL_MODULES[*]}
|
||||||
|
|
||||||
|
SUBMODULES:
|
||||||
|
${ALL_SUBMODULES[*]}
|
||||||
|
|
||||||
DEPENDENCIES:
|
DEPENDENCIES:
|
||||||
binaries (system): graphviz pandoc pdftk-java gd-devel
|
deb: ${depends_deb[@]}
|
||||||
perl (cpan): File::Map ExtUtils::PkgConfig GD GO::TermFinder
|
rpm: ${depends_rpm[@]}
|
||||||
R (default): BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc rmarkdown
|
brew: ${depends_brew[@]}
|
||||||
R (BiocManager):
|
perl: ${depends_perl[@]}
|
||||||
|
R: ${depends_r[@]}
|
||||||
|
BiocManager: ${depends_bioc[@]}
|
||||||
|
|
||||||
EXAMPLES:
|
EXAMPLES:
|
||||||
script-run-workflow --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
|
script-run-workflow --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
|
||||||
script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
|
script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
|
||||||
script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[1]} --module ${ALL_MODULES[2]} --yes
|
script-run-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[1]} --module ${ALL_MODULES[2]} --yes
|
||||||
|
script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]}
|
||||||
|
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT
|
||||||
|
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,10 +84,9 @@ print_help() {
|
|||||||
# @section User Input
|
# @section User Input
|
||||||
# @description Creates array and switches from user input
|
# @description Creates array and switches from user input
|
||||||
parse_input() {
|
parse_input() {
|
||||||
echo "Running: ${FUNCNAME[0]}" "$@"
|
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||||
|
|
||||||
long_opts="project:,include:,exclude:,yes,auto,debug,help"
|
long_opts="project:,include:,exclude:,yes,auto,debug,help"
|
||||||
#long_opts+="restorefile:,betapass:,"
|
|
||||||
short_opts="+p:i:x:yhd"
|
short_opts="+p:i:x:yhd"
|
||||||
|
|
||||||
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
|
if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
|
||||||
@@ -77,15 +95,27 @@ parse_input() {
|
|||||||
case $1 in
|
case $1 in
|
||||||
--project|-p)
|
--project|-p)
|
||||||
shift
|
shift
|
||||||
declare -ga PROJECT_NAMES+=("$1")
|
if [[ $1 == *','* ]] ; then # check for commas
|
||||||
|
IFS=',' read -ra PROJECTS <<< "$1"
|
||||||
|
else
|
||||||
|
PROJECTS+=("$1")
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
--include|-i)
|
--include|-i)
|
||||||
shift
|
shift
|
||||||
declare -ga MODULES+=("$1")
|
if [[ $1 == *','* ]] ; then # check for commas
|
||||||
|
IFS=',' read -ra INCLUDE_MODULES <<< "$1"
|
||||||
|
else
|
||||||
|
INCLUDE_MODULES+=("$1")
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
--exclude|-x)
|
--exclude|-x)
|
||||||
shift
|
shift
|
||||||
declare -ga EXCLUDE_MODULES+=("$1")
|
if [[ $1 == *','* ]] ; then # check for commas
|
||||||
|
IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
|
||||||
|
else
|
||||||
|
EXCLUDE_MODULES+=("$1")
|
||||||
|
fi
|
||||||
;;
|
;;
|
||||||
--yes|-y|--auto)
|
--yes|-y|--auto)
|
||||||
declare -g YES=1
|
declare -g YES=1
|
||||||
@@ -109,12 +139,118 @@ parse_input() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# @section Helper functions
|
||||||
|
# @internal
|
||||||
|
module() {
|
||||||
|
debug "Adding $1 module"
|
||||||
|
ALL_MODULES+=("$1")
|
||||||
|
declare -gA "$1"
|
||||||
|
}
|
||||||
|
submodule() {
|
||||||
|
debug "Adding $1 submodule"
|
||||||
|
ALL_SUBMODULES+=("$1")
|
||||||
|
declare -gA "$1"
|
||||||
|
}
|
||||||
|
# This function will only work if users have an actual name registered on the server
|
||||||
|
# TODO for now just use username
|
||||||
|
# user_initials() {
|
||||||
|
# user_record="$(getent passwd "$(whoami)")"
|
||||||
|
# user_gecos_field="$(echo "$user_record" | cut -d ':' -f 5)"
|
||||||
|
# user_full_name="$(echo "$user_gecos_field" | cut -d ',' -f 1)"
|
||||||
|
# last="${user_full_name#* }"
|
||||||
|
# echo "${user_full_name:0:1}${last:0:1}"
|
||||||
|
# }
|
||||||
|
ask() {
|
||||||
|
declare response
|
||||||
|
(( YES )) && return 0
|
||||||
|
read -r -p "$* [y/N]: " response
|
||||||
|
[[ ${response,,} =~ ^(yes|y)$ ]]
|
||||||
|
}
|
||||||
|
err() { echo "Error: $*" >&2; }
|
||||||
|
ask_pn() {
|
||||||
|
declare -g PROJECT
|
||||||
|
read -r -p "Enter a full project name (ex. ${PROJECT_PREFIX}_PROJECT_NAME): " PROJECT
|
||||||
|
}
|
||||||
|
debug() { (( DEBUG )) && echo "Debug: $*"; }
|
||||||
|
|
||||||
|
|
||||||
|
# @section Modules
|
||||||
|
# @description A module contains a cohesive set of actions/experiments to run on a project
|
||||||
|
# Use a module when:
|
||||||
|
# * Building a new type of analysis
|
||||||
|
# * Combining submodules
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
module install_dependencies
|
||||||
|
# @description Installs dependencies for the workflow
|
||||||
|
install_dependencies() {
|
||||||
|
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||||
|
|
||||||
|
# Dependency arrays
|
||||||
|
depends_rpm=(graphviz pandoc pdftk-java gd-devel)
|
||||||
|
depends_deb=(graphviz pandoc pdftk-java libgd-dev)
|
||||||
|
depends_brew=(graphiz pandoc gd pdftk-java)
|
||||||
|
depends_perl=(File::Map ExtUtils::PkgConfig GD GO::TermFinder)
|
||||||
|
depends_r=(BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
|
||||||
|
plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc rmarkdown)
|
||||||
|
depends_bioc=(org.Sc.sgd.db)
|
||||||
|
|
||||||
|
[[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars
|
||||||
|
|
||||||
|
# Install system-wide dependencies
|
||||||
|
echo "Installing system dependencies"
|
||||||
|
case "$(uname -s)" in
|
||||||
|
Linux*|CYGWIN*|MINGW*)
|
||||||
|
ask "Detected Linux platform, continue?" || return 1
|
||||||
|
echo "You may be prompted for your sudo password to install system packages"
|
||||||
|
if hash dnf &>/dev/null; then
|
||||||
|
sudo dnf install "${depends_rpm[@]}"
|
||||||
|
elif hash apt &>/dev/null; then
|
||||||
|
sudo apt install "${depends_deb[@]}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
Darwin*)
|
||||||
|
ask "Detected Mac platform, continue?" || return 1
|
||||||
|
export HOMEBREW_BREW_GIT_REMOTE="https://github.com/Homebrew/brew"
|
||||||
|
curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh|bash
|
||||||
|
brew install "${depends_brew[@]}"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Your system could not be detected, please install dependencies manually"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Install perl CPAN modules
|
||||||
|
echo "Installing perl CPAN modules"
|
||||||
|
debug "cpan" "${depends_perl[@]}"
|
||||||
|
cpan "${depends_perl[@]}"
|
||||||
|
|
||||||
|
# Install R packages
|
||||||
|
echo "Installing R packages"
|
||||||
|
|
||||||
|
depends_r_str=""
|
||||||
|
depends_r_to_string() {
|
||||||
|
for d in "${depends_r[@]}"; do
|
||||||
|
depends_r_str+="$d\", \""
|
||||||
|
done
|
||||||
|
depends_r_str="${depends_r_str::-3}" # strip last , " (comma and quote)
|
||||||
|
}
|
||||||
|
depends_r_to_string
|
||||||
|
|
||||||
|
debug "Rscript -e install.packages(c(\"$depends_r_str), dep=TRUE, repos=\"https://cloud.r-project.org\")"
|
||||||
|
Rscript -e "install.packages(c(\"$depends_r_str), dep=TRUE, repos=\"https://cloud.r-project.org\")"
|
||||||
|
Rscript -e "BiocManager::install(\"${depends_bioc[0]}\")"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
module init_job
|
||||||
# @section Initialize a new job in the scans directory
|
# @section Initialize a new job in the scans directory
|
||||||
# @description Create a new ExpJobs project
|
# @description Create a new ExpJobs project
|
||||||
# TODO Copy over source image directories from robot - are these alse named by the ExpJobs name?
|
# TODO Copy over source image directories from robot - are these alse named by the ExpJobs name?
|
||||||
init_job() {
|
init_job() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
|
||||||
if [[ -d $SCAN_DIR ]]; then
|
if [[ -d $SCAN_DIR ]]; then
|
||||||
ask "$SCAN_DIR already exists, re-initialize?" || return 0
|
ask "$SCAN_DIR already exists, re-initialize?" || return 0
|
||||||
else
|
else
|
||||||
@@ -124,17 +260,17 @@ init_job() {
|
|||||||
|
|
||||||
[[ -d $SCAN_DIR/MasterPlateFiles ]] || mkdir -p "$SCAN_DIR/MasterPlateFiles"
|
[[ -d $SCAN_DIR/MasterPlateFiles ]] || mkdir -p "$SCAN_DIR/MasterPlateFiles"
|
||||||
|
|
||||||
DRUG_MEDIA_FILE="$SCAN_DIR/MasterPlateFiles/DrugMedia_$PROJECT_NAME.xls"
|
DRUG_MEDIA_FILE="$SCAN_DIR/MasterPlateFiles/DrugMedia_$PROJECT.xls"
|
||||||
MASTER_PLATE_FILE="$SCAN_DIR/MasterPlateFiles/MasterPlate_$PROJECT_NAME.xls"
|
MASTER_PLATE_FILE="$SCAN_DIR/MasterPlateFiles/MasterPlate_$PROJECT.xls"
|
||||||
|
|
||||||
# TODO Where are the actual templates?
|
# TODO Where are the actual templates?
|
||||||
for f in $DRUG_MEDIA_FILE $MASTER_PLATE_FILE; do
|
for f in $DRUG_MEDIA_FILE $MASTER_PLATE_FILE; do
|
||||||
touch "$f"
|
touch "$f"
|
||||||
done
|
done
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
module easy
|
||||||
# @section EASY
|
# @section EASY
|
||||||
# @description Start an EASY analysis
|
# @description Start an EASY analysis
|
||||||
# The QHTCPImageFolders and ‘MasterPlateFiles’ folder are the inputs for image analysis with EASY software.
|
# The QHTCPImageFolders and ‘MasterPlateFiles’ folder are the inputs for image analysis with EASY software.
|
||||||
@@ -142,7 +278,7 @@ init_job() {
|
|||||||
# The ‘Results’ directory is created and entered, using the “File >> New Experiment” dropdown in EASY.
|
# The ‘Results’ directory is created and entered, using the “File >> New Experiment” dropdown in EASY.
|
||||||
# Multiple ‘Results’ files may be created (and uniquely named) within an ‘ExperimentJob’ folder.
|
# Multiple ‘Results’ files may be created (and uniquely named) within an ‘ExperimentJob’ folder.
|
||||||
easy() {
|
easy() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
EASY="/mnt/data/EASY/EasyDev2024/BU/EASY240430AppExported/EstartConsole.m"
|
EASY="/mnt/data/EASY/EasyDev2024/BU/EASY240430AppExported/EstartConsole.m"
|
||||||
|
|
||||||
pushd "$SCAN_DIR" || return 1
|
pushd "$SCAN_DIR" || return 1
|
||||||
@@ -172,22 +308,35 @@ easy() {
|
|||||||
echo "EASY OUTPUT ARRAY: " "${EASY_OUT_ARRAY[@]}"
|
echo "EASY OUTPUT ARRAY: " "${EASY_OUT_ARRAY[@]}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
module ezview
|
||||||
# @section EZView
|
# @section EZView
|
||||||
ezview() {
|
ezview() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
EZVIEW_DIR="/mnt/data/EZVIEW"
|
EZVIEW_DIR="/mnt/data/EZVIEW"
|
||||||
echo "$EZVIEW_DIR"
|
echo "$EZVIEW_DIR"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# @section StudiesQHTCP
|
module qhtcp
|
||||||
# @ description This section is derived from the earliest work of Jinyu Guo. As such it uses Perl scripts. Without porting these two Perl scripts into a new intergrated R script or Python script, one is contrained to use the rather crude copy-paste and and shell script inherient in the original procedures. These two Perl scripts are analyze_v2.pl and terms2tsv_v4.pl which were written in 2003 by Gavin Sherlock for the SGD gene ontology system and require perl installations of such files. These also require that the gene_ontology_edit.obo, SGD_features.tab files used in the ../Code also be included here. Without rewriting the code, one must compromise directory convenience.
|
# @section QHTCP
|
||||||
|
# @description Main QHTCP module (functional rewrite of REMcMaster3.sh)
|
||||||
# @description Main loop for qhtcp modules (rewrite of REMcMaster3.sh)
|
|
||||||
qhtcp() {
|
qhtcp() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
TEMPLATE_DIR="$SCRIPT_DIR/templates/qhtcp"
|
TEMPLATE_DIR="$SCRIPT_DIR/templates/qhtcp"
|
||||||
QHTCP_DIR="/mnt/data/StudiesQHTCP/$PROJECT_NAME"
|
QHTCP_DIR="/mnt/data/StudiesQHTCP/$PROJECT"
|
||||||
|
|
||||||
|
# Our list of submodules (functions) to run for this module
|
||||||
|
# Put these in the appropriate order of operations
|
||||||
|
submodules=(
|
||||||
|
r_join_interact
|
||||||
|
java_extract
|
||||||
|
r_add_shift_values
|
||||||
|
r_heat_maps_zscores
|
||||||
|
r_heat_maps_homology
|
||||||
|
py_gtf
|
||||||
|
r_compile_gtf
|
||||||
|
)
|
||||||
|
|
||||||
while [[ -d $QHTCP_DIR ]]; do
|
while [[ -d $QHTCP_DIR ]]; do
|
||||||
echo "A project already exists at $QHTCP_DIR"
|
echo "A project already exists at $QHTCP_DIR"
|
||||||
@@ -202,24 +351,38 @@ qhtcp() {
|
|||||||
echo "New project created at $QHTCP_DIR"
|
echo "New project created at $QHTCP_DIR"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Create StudyInfo.csv
|
||||||
|
# Right now this is identical to the template but we can change it later
|
||||||
|
cat <<-EOF > "$QHTCP_DIR/Code/StudyInfo.csv"
|
||||||
|
ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy
|
||||||
|
1,ExpName1,NA,NA,UserInitials
|
||||||
|
2,ExpName2,NA,NA,UserInitials
|
||||||
|
3,ExpName3,NA,NA,UserInitials
|
||||||
|
4,ExpName4,NA,NA,UserInitials
|
||||||
|
EOF
|
||||||
|
|
||||||
# Enter REMc directory to run the scripts there
|
# Enter REMc directory to run the scripts there
|
||||||
pushd "$QHTCP_DIR/REMc" || return 1
|
pushd "$QHTCP_DIR/REMc" || return 1
|
||||||
|
|
||||||
r_join_interact &&
|
# Run each submodule
|
||||||
java_jingyu_extract &&
|
for s in "${submodules[@]}"; do "$s"; done
|
||||||
r_add_shift_values &&
|
|
||||||
r_heat_maps_zscores &&
|
|
||||||
r_heat_maps_homology &&
|
|
||||||
py_gtf &&
|
|
||||||
r_compile_gtf
|
|
||||||
|
|
||||||
popd || return 1
|
popd || return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# @section Submodules
|
||||||
|
# @description Submodules provide functionaility to modules and are reusable between modules
|
||||||
|
# Use a submodule when:
|
||||||
|
# * Calling external scripts
|
||||||
|
# * Performing repetitive tasks
|
||||||
|
# *
|
||||||
|
#
|
||||||
|
|
||||||
|
submodule r_join_interact
|
||||||
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
||||||
r_join_interact() {
|
r_join_interact() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
echo "Rscript JoinInteractExps3dev.R"
|
echo "Rscript JoinInteractExps3dev.R"
|
||||||
Rscript JoinInteractExps3dev.R
|
Rscript JoinInteractExps3dev.R
|
||||||
out_file="REMcRdy_lm_only.csv"
|
out_file="REMcRdy_lm_only.csv"
|
||||||
@@ -230,10 +393,11 @@ r_join_interact() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
submodule java_extract
|
||||||
# @description Jingyu's REMc java utility using file input file REMcRdy_lm_only.csv
|
# @description Jingyu's REMc java utility using file input file REMcRdy_lm_only.csv
|
||||||
# and output REMcRdy_lm_only.csv-finalTable.csv
|
# and output REMcRdy_lm_only.csv-finalTable.csv
|
||||||
java_jingyu_extract() {
|
java_extract() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
classpath="jingyuJava_1_7_extractLib.jar"
|
classpath="jingyuJava_1_7_extractLib.jar"
|
||||||
out_file="REMcRdy_lm_only.csv-finalTable.csv"
|
out_file="REMcRdy_lm_only.csv-finalTable.csv"
|
||||||
|
|
||||||
@@ -253,10 +417,11 @@ java_jingyu_extract() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
submodule r_add_shift_values
|
||||||
# @description Add shift values back to REMcRdy_lm_only.csv-finalTable.csv
|
# @description Add shift values back to REMcRdy_lm_only.csv-finalTable.csv
|
||||||
# and output "REMcWithShift.csv" for use with the REMc heat maps
|
# and output "REMcWithShift.csv" for use with the REMc heat maps
|
||||||
r_add_shift_values() {
|
r_add_shift_values() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
out_file="REMcHeatmaps/REMcWithShift.csv"
|
out_file="REMcHeatmaps/REMcWithShift.csv"
|
||||||
echo "Rscript AddShiftVals2.R"
|
echo "Rscript AddShiftVals2.R"
|
||||||
Rscript AddShiftVals2.R
|
Rscript AddShiftVals2.R
|
||||||
@@ -265,9 +430,10 @@ r_add_shift_values() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
submodule r_heat_maps_zscores
|
||||||
# @description Execute REMcHeatmaps_zscores.R
|
# @description Execute REMcHeatmaps_zscores.R
|
||||||
r_heat_maps_zscores() {
|
r_heat_maps_zscores() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
out_file="REMcHeatmaps/compiledREMcHeatmaps.pdf"
|
out_file="REMcHeatmaps/compiledREMcHeatmaps.pdf"
|
||||||
echo "Rscript REMcHeatmaps_zscores.R"
|
echo "Rscript REMcHeatmaps_zscores.R"
|
||||||
Rscript REMcHeatmaps_zscores.R
|
Rscript REMcHeatmaps_zscores.R
|
||||||
@@ -278,9 +444,10 @@ r_heat_maps_zscores() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
submodule r_heat_maps_homology
|
||||||
# @description Execute REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R
|
# @description Execute REMcHeatmaps_Z_lm_wDAmPs_andHomology_221212.R
|
||||||
r_heat_maps_homology() {
|
r_heat_maps_homology() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
work_dir="REMcHeatmapsWithHomology"
|
work_dir="REMcHeatmapsWithHomology"
|
||||||
source_file="REMcHeatmaps/REMcWithShift.csv"
|
source_file="REMcHeatmaps/REMcWithShift.csv"
|
||||||
target_file="$work_dir/REMcWithShift.csv"
|
target_file="$work_dir/REMcWithShift.csv"
|
||||||
@@ -307,9 +474,10 @@ r_heat_maps_homology() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
submodule py_gtf
|
||||||
# @description Perform GTF
|
# @description Perform GTF
|
||||||
py_gtf() {
|
py_gtf() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
process_dir="GTF/Process"
|
process_dir="GTF/Process"
|
||||||
function_dir="GTF/Function"
|
function_dir="GTF/Function"
|
||||||
component_dir="GTF/Component"
|
component_dir="GTF/Component"
|
||||||
@@ -325,7 +493,7 @@ py_gtf() {
|
|||||||
# @description Not sure what to name this
|
# @description Not sure what to name this
|
||||||
# @arg $1 string directory name
|
# @arg $1 string directory name
|
||||||
_process() {
|
_process() {
|
||||||
echo "Running: ${FUNCNAME[0]}" "$@"
|
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||||
pushd "$1" || return 1
|
pushd "$1" || return 1
|
||||||
|
|
||||||
shopt -s nullglob
|
shopt -s nullglob
|
||||||
@@ -350,6 +518,27 @@ py_gtf() {
|
|||||||
popd || return 1
|
popd || return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# @description Perl analyze submodule
|
||||||
|
# @arg $1 string "Set 1"
|
||||||
|
# @arg $@ string
|
||||||
|
pl_analyze() {
|
||||||
|
:
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pl_terms2tsv() {
|
||||||
|
:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Perform operations in each directory
|
# Perform operations in each directory
|
||||||
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
||||||
set1="ORF_List_Without_DAmPs.txt"
|
set1="ORF_List_Without_DAmPs.txt"
|
||||||
@@ -361,141 +550,56 @@ py_gtf() {
|
|||||||
|
|
||||||
# @description Compile GTF in R
|
# @description Compile GTF in R
|
||||||
r_compile_gtf() {
|
r_compile_gtf() {
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
echo "Rscript CompileGTF.R"
|
echo "Rscript CompileGTF.R"
|
||||||
Rscript CompileGTF.R
|
Rscript CompileGTF.R
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# @description Installs dependencies for the workflow
|
|
||||||
install_dependencies() {
|
|
||||||
echo "Running: ${FUNCNAME[0]}"
|
|
||||||
|
|
||||||
# Install system-wide dependencies
|
|
||||||
echo "Installing system dependencies"
|
|
||||||
case "$(uname -s)" in
|
|
||||||
Linux*|CYGWIN*|MINGW*)
|
|
||||||
ask "Detected Linux platform, continue?" || return 1
|
|
||||||
echo "You may be prompted for your sudo password to install system packages"
|
|
||||||
if hash dnf &>/dev/null; then
|
|
||||||
sudo dnf install graphviz pandoc pdftk-java gd-devel
|
|
||||||
elif hash apt &>/dev/null; then
|
|
||||||
sudo apt install graphviz pandoc pdftk-java libgd-dev
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
Darwin*)
|
|
||||||
ask "Detected Mac platform, continue?" || return 1
|
|
||||||
export HOMEBREW_BREW_GIT_REMOTE="https://github.com/Homebrew/brew"
|
|
||||||
curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh|bash
|
|
||||||
brew install graphiz
|
|
||||||
brew install gd
|
|
||||||
brew install pdftk-java
|
|
||||||
brew install pandoc
|
|
||||||
cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Your system could not be detected, please install dependencies manually"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Install perl CPAN modules
|
|
||||||
echo "Installing perl CPAN modules"
|
|
||||||
echo "cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder"
|
|
||||||
cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder
|
|
||||||
|
|
||||||
# Install R packages
|
|
||||||
echo "Installing R packages"
|
|
||||||
Rscript -e 'install.packages(c(\
|
|
||||||
"BiocManager", \
|
|
||||||
"ontologyIndex" \
|
|
||||||
"ggrepel" \
|
|
||||||
"tidyverse" \
|
|
||||||
"sos" \
|
|
||||||
"openxlsx" \
|
|
||||||
"ggplot2" \
|
|
||||||
"plyr" \
|
|
||||||
"extrafont" \
|
|
||||||
"gridExtra" \
|
|
||||||
"gplots" \
|
|
||||||
"stringr" \
|
|
||||||
"plotly" \
|
|
||||||
"ggthemes" \
|
|
||||||
"pandoc" \
|
|
||||||
"rmarkdown" \
|
|
||||||
), dep=TRUE, repos="https://cloud.r-project.org")'
|
|
||||||
Rscript -e 'BiocManager::install("org.Sc.sgd.db")'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# @internal
|
|
||||||
ask() {
|
|
||||||
declare response
|
|
||||||
(( YES )) && return 0
|
|
||||||
read -r -p "$* [y/N]: " response
|
|
||||||
[[ ${response,,} =~ ^(yes|y)$ ]]
|
|
||||||
}
|
|
||||||
# @internal
|
|
||||||
err() { echo "Error: $*" >&2; }
|
|
||||||
# @internal
|
|
||||||
ask_pn() {
|
|
||||||
declare -g PROJECT_NAME
|
|
||||||
read -r -p "Enter a full project name (ex. ${PROJECT_PREFIX}_PROJECT_NAME): " PROJECT_NAME
|
|
||||||
}
|
|
||||||
# @internal
|
|
||||||
debug() { (( DEBUG )) && echo "Debug: $*"; }
|
|
||||||
|
|
||||||
|
|
||||||
# @description The main loop of script-run-workflow
|
# @description The main loop of script-run-workflow
|
||||||
# May eventually need to add git ops
|
# May eventually need to add git ops
|
||||||
# Passes on arguments
|
# Passes on arguments
|
||||||
# Most variables in main() are user configurable or can be overriden by env
|
# Most variables in main() are user configurable or can be overriden by env
|
||||||
main() {
|
main() {
|
||||||
echo "Running: ${FUNCNAME[0]}" "$@"
|
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||||
|
|
||||||
# Where are we located?
|
# Where are we located?
|
||||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
|
||||||
# Set the automatic project directory prefix
|
# Set the automatic project directory prefix
|
||||||
PROJECT_PREFIX="$(whoami)_$(date +%y_%m_%d)"
|
PROJECT_PREFIX="$(whoami)_$(date +%y_%m_%d)"
|
||||||
|
san() { [[ $1 =~ .+_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_.+ ]]; } # sanitizer regex for prefix
|
||||||
# When adding a module, it also should be added to this list
|
|
||||||
ALL_MODULES=(
|
|
||||||
install_dependencies
|
|
||||||
init_job
|
|
||||||
easy
|
|
||||||
ezview
|
|
||||||
qhtcp
|
|
||||||
)
|
|
||||||
|
|
||||||
declare -a PROJECT_NAMES=() # this array will hold all of the projects to run
|
declare -ag PROJECTS=() # this array will hold all of the projects for this run
|
||||||
[[ $# -eq 1 ]] && PROJECT_NAMES+=("$1") # easy way to run on single dir
|
|
||||||
[[ $# -ge 2 ]] && parse_input "$@" # parse arguments with getopt
|
|
||||||
|
|
||||||
# Prompt user for the PROJECT_NAME if we still don't have one
|
parse_input "$@" # parse arguments with getopt
|
||||||
if [[ ${#PROJECT_NAMES[@]} -eq 0 ]]; then # still allows for environment overrides
|
|
||||||
|
# Prompt user for the PROJECT if we still don't have one
|
||||||
|
if [[ ${#PROJECTS[@]} -eq 0 ]]; then # still allows for environment overrides
|
||||||
ask_pn
|
ask_pn
|
||||||
PROJECT_NAMES+=("$PROJECT_NAME")
|
PROJECTS+=("$PROJECT")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Sanitize PROJECT_NAMES
|
for i in "${!PROJECTS[@]}"; do
|
||||||
# This regex should match PROJECT_PREFIX
|
if ! san "${PROJECTS[i]}"; then
|
||||||
san() { [[ $1 =~ .+_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_.+ ]]; }
|
echo "Project name ${PROJECTS[i]} is invalid"
|
||||||
for i in "${!PROJECT_NAMES[@]}"; do
|
|
||||||
if ! san "${PROJECT_NAME[i]}"; then
|
|
||||||
echo "Project name ${PROJECT_NAME[$i]} is invalid"
|
|
||||||
echo "Enter a replacement"
|
echo "Enter a replacement"
|
||||||
ask_pn
|
ask_pn
|
||||||
san "$PROJECT_NAME" || (echo "RTFM"; return 1)
|
san "$PROJECT" || (echo "RTFM"; return 1)
|
||||||
PROJECT_NAME[i]="$PROJECT_NAME"
|
PROJECTS[i]="$PROJECT"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
SCANS_DIR="${SCANS_DIR:-"/mnt/data/ExpJobs"}" # TODO propose changing this to something else
|
SCANS_DIR="${SCANS_DIR:-"/mnt/data/ExpJobs"}" # TODO propose changing this to something else
|
||||||
|
|
||||||
# If we don't catch with getopt or env, run all
|
# If we don't catch with getopt or env, run all
|
||||||
[[ ${#MODULES[@]} -eq 0 ]] && MODULES=("${ALL_MODULES[@]}")
|
if [[ ${#INCLUDE_MODULES[@]} -eq 0 ]]; then
|
||||||
|
MODULES=("${ALL_MODULES[@]}")
|
||||||
|
else
|
||||||
|
MODULES=("${INCLUDE_MODULES[@]}")
|
||||||
|
fi
|
||||||
|
|
||||||
# Exclude modules overrides include
|
# Exclude modules from --exclude
|
||||||
arr=()
|
arr=()
|
||||||
for m in "${MODULES[@]}"; do
|
for m in "${MODULES[@]}"; do
|
||||||
[[ " ${EXCLUDE_MODULES[*]} " =~ [[:space:]]${m}[[:space:]] ]] || arr+=("$m")
|
[[ " ${EXCLUDE_MODULES[*]} " =~ [[:space:]]${m}[[:space:]] ]] || arr+=("$m")
|
||||||
@@ -508,15 +612,15 @@ main() {
|
|||||||
if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then
|
if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then
|
||||||
echo "Module $m not in the module list"
|
echo "Module $m not in the module list"
|
||||||
echo "Available modules: ${ALL_MODULES[*]}"
|
echo "Available modules: ${ALL_MODULES[*]}"
|
||||||
read -r -p "Enter replacement name: " MODULE
|
read -r -p "Enter replacement module name: " MODULE
|
||||||
! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULE}[[:space:]] ]] || (echo "RTFM"; return 1)
|
! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULE}[[:space:]] ]] || (echo "RTFM"; return 1)
|
||||||
MODULES[i]="$MODULE"
|
MODULES[i]="$MODULE"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Loop over projects
|
# Loop over projects
|
||||||
for PROJECT_NAME in "${PROJECT_NAMES[@]}"; do
|
for PROJECT in "${PROJECTS[@]}"; do
|
||||||
SCAN_DIR="$SCANS_DIR/$PROJECT_NAME"
|
SCAN_DIR="$SCANS_DIR/$PROJECT"
|
||||||
|
|
||||||
# Run selected modules
|
# Run selected modules
|
||||||
for m in "${MODULES[@]}"; do
|
for m in "${MODULES[@]}"; do
|
||||||
|
|||||||
Reference in New Issue
Block a user