Some more refactoring and cleanup
This commit is contained in:
@@ -74,7 +74,7 @@ print_help() {
|
||||
script-run-workflow [[OPTION] [VALUE]]...
|
||||
|
||||
Some options (--project, --include, --exclude) can be passed multiple times or
|
||||
by using comma deliminated strings (see EXAMPLES below)
|
||||
by using comma-separated strings (see EXAMPLES below)
|
||||
|
||||
OPTIONS:
|
||||
--project, -p PROJECT
|
||||
@@ -117,6 +117,7 @@ print_help() {
|
||||
script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]}
|
||||
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT
|
||||
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug
|
||||
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT --submodule ${ALL_SUBMODULES[2]} \"/path/to/genefile.txt,/path/to/output/dir\" --submodule ${ALL_SUBMODULES[3]} \"/path/to/sgofile\"
|
||||
EOF
|
||||
}
|
||||
|
||||
@@ -326,7 +327,7 @@ print_header() {
|
||||
# Let user choose project(s)
|
||||
if [[ -z ${PROJECTS[*]} ]]; then
|
||||
num=$((${#projects[@]}))
|
||||
echo "Enter comma delimited project #'s (from list) to analyze"
|
||||
echo "Enter a comma-separated list of project numbers to analyze"
|
||||
read -r -p "Or hit Enter to add a new project" response
|
||||
[[ -z $response ]] && ask_pn && PROJECTS+=("${ADD_PROJECTS[@]}")
|
||||
((YES)) || read -r -p "Hit enter for default ($num): " response
|
||||
@@ -346,7 +347,7 @@ print_header() {
|
||||
done
|
||||
|
||||
if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} ]]; then
|
||||
echo "Enter module #'s to run (by #, comma delimited)"
|
||||
echo "Enter a comma-separated list of modules to run"
|
||||
((YES)) || read -r -p "Hit Enter for all (default) or '0' for none: " response
|
||||
if [[ -n $response && $response -ne 0 ]]; then
|
||||
IFS=',' read -ra arr <<< "$response"
|
||||
@@ -359,7 +360,8 @@ print_header() {
|
||||
|
||||
if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} && -z ${SUBMODULES[*]} ]]; then
|
||||
while :; do
|
||||
echo "Enter a submodule followed by its arguments as a case delimited string in quotes"
|
||||
echo "Enter a submodule followed by its arguments as a comma-separated string"
|
||||
echo "Quote your string if there are any whitespaces"
|
||||
echo "Example: ${ALL_SUBMODULES[0]} \"arg1,arg2,arg3...\""
|
||||
((YES)) || read -r -p "Or hit Enter to continue: " response
|
||||
[[ -z $response ]] && break
|
||||
@@ -520,6 +522,7 @@ init_project() {
|
||||
if ask "You can edit this file in the qhtcp module"; then
|
||||
cat <<-EOF > "$STUDY_INFO_FILE"
|
||||
"ExpNumb","ExpLabel","BackgroundSD","ZscoreJoinSD","AnalysisBy"
|
||||
|
||||
EOF
|
||||
fi
|
||||
fi
|
||||
@@ -943,15 +946,10 @@ qhtcp() {
|
||||
|
||||
|
||||
module remc
|
||||
# @section GTF
|
||||
# @description GTF module for QHTCP
|
||||
# TODO which components of remc can be parallelized?
|
||||
# The submodules in remc really like to be run from the REMc dir
|
||||
# so we pop in and out for now
|
||||
# NOTE the remc modules could use some love
|
||||
# * Don't cd within scripts, it's confusing
|
||||
# * Use arguments to pass configuration variables
|
||||
# * This allows us to abstract the program away in script-run-workflow and treat it like a module
|
||||
# @section remc
|
||||
# @description remc module for QHTCP
|
||||
# TODO
|
||||
# * Which components can be parallelized?
|
||||
# @arg $1 string studyInfo file
|
||||
remc() {
|
||||
debug "Running: ${FUNCNAME[0]} $*"
|
||||
@@ -992,14 +990,14 @@ module gtf
|
||||
gtf() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
gtf_out_dir="${1:-$QHTCP_PROJECT_DIR/out/gtf}"
|
||||
gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
|
||||
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
|
||||
orf_list="${4:-"$APPS_DIR/r/ORF_List_Without_DAmPs.txt"}"
|
||||
|
||||
process_dir="$gtf_out_dir/process"
|
||||
function_dir="$gtf_out_dir/function"
|
||||
component_dir="$gtf_out_dir/component"
|
||||
|
||||
gene_association_sgd="${2:-"$APPS_DIR/perl/gene_association.sgd"}"
|
||||
gene_ontology_obo="${3:-"$APPS_DIR/perl/gene_ontology_edit.obo"}"
|
||||
orf_list="${4:-"$APPS_DIR/perl/ORF_List_Without_DAmPs.txt"}"
|
||||
|
||||
py_gtf_dcon \
|
||||
"$process_dir" \
|
||||
"$gtf_out_dir"
|
||||
@@ -1040,26 +1038,26 @@ module gta
|
||||
# TODO
|
||||
# *
|
||||
# *
|
||||
# @set GTA_OUT_DIR string The GTA output results dir
|
||||
# @set all_sgd_terms_csv string The all_SGD_GOTerms_for_QHTCPtk.csv file
|
||||
# @set sgd_terms_tfile string The go_terms.tab file
|
||||
# @set sgd_features_file string The gene_association.sgd file
|
||||
# @set gene_ontology_file string The gene_ontology_edit.obo file
|
||||
# @set zscores_file string The ZScores_interaction.csv file
|
||||
# @arg $1 string output directory
|
||||
# @arg $2 string gene_association.sgd
|
||||
# @arg $3 string gene_ontology_edit.obo
|
||||
# @arg $4 string go_terms.tab
|
||||
# @arg $5 string All_SGD_GOTerms_for_QHTCPtk.csv
|
||||
# @arg $6 string zscores_interaction.csv
|
||||
gta() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
|
||||
GTA_OUT_DIR="$QHTCP_PROJECT_DIR/gta"
|
||||
all_sgd_terms_csv="$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"
|
||||
sgd_terms_tfile="$APPS_DIR/r/go_terms.tab"
|
||||
sgd_features_file="$APPS_DIR/r/gene_association.sgd"
|
||||
gene_ontology_file="$APPS_DIR/r/gene_ontology_edit.obo"
|
||||
zscores_file="zscores/zscores_interaction.csv"
|
||||
gta_out_dir="${1:-"$QHTCP_PROJECT_DIR/gta"}"
|
||||
gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
|
||||
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
|
||||
sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
|
||||
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
|
||||
zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results
|
||||
|
||||
# Sets STUDIES_NUM and NUM_STUDIES
|
||||
get_studies "$STUDY_INFO_FILE"
|
||||
|
||||
[[ -d $GTA_OUT_DIR ]] || mkdir "$GTA_OUT_DIR"
|
||||
[[ -d $gta_out_dir ]] || mkdir "$gta_out_dir"
|
||||
|
||||
# Loop over the array and create pairwise arrays
|
||||
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
|
||||
@@ -1088,13 +1086,13 @@ gta() {
|
||||
for s in "${STUDIES_NUMS[@]}"; do
|
||||
zscores_file="$QHTCP_PROJECT_DIR/Exp$s/$zscores_file"
|
||||
if [[ -f $zscores_file ]]; then
|
||||
mkdir "$GTA_OUT_DIR/Exp$s"
|
||||
mkdir "$gta_out_dir/Exp$s"
|
||||
r_gta \
|
||||
"Exp$s" \
|
||||
"$zscores_file" \
|
||||
"$sgd_terms_tfile" \
|
||||
"$sgd_features_file" \
|
||||
"$GTA_OUT_DIR"
|
||||
"$gene_association_sgd" \
|
||||
"$gta_out_dir"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -1102,7 +1100,7 @@ gta() {
|
||||
for combo in "${study_combos[@]}"; do
|
||||
# Split on comma and assign to array
|
||||
IFS=',' read -ra studies <<< "$combo"
|
||||
r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$GTA_OUT_DIR"
|
||||
r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$gta_out_dir"
|
||||
done
|
||||
|
||||
# All studies
|
||||
@@ -1110,7 +1108,7 @@ gta() {
|
||||
# are required
|
||||
r_gta_heatmaps \
|
||||
"$STUDY_INFO_FILE" \
|
||||
"$gene_ontology_file" \
|
||||
"$gene_ontology_obo" \
|
||||
"$sgd_terms_tfile" \
|
||||
"$all_sgd_terms_csv" \
|
||||
"$zscores_file" \
|
||||
@@ -1121,14 +1119,12 @@ gta() {
|
||||
|
||||
|
||||
# @section Submodules
|
||||
# @description Submodules provide functionality to modules and should be reusable
|
||||
# A submodule only runs by default if called by a module
|
||||
# Use a submodule for:
|
||||
# * Calling external scripts
|
||||
# * Performing repetitive tasks
|
||||
# * Generalizing code
|
||||
# * Functions you do not want to perform by default (submodules should be called modules)
|
||||
# * Should not call cd or pushd (let module dictate)
|
||||
# @description Submodules are shell wrappers for workflow components in external languages.
|
||||
# Submodules:
|
||||
# * Allow scripts to be called by the main workflow script using input\
|
||||
# and output arguments as a translation mechanism.
|
||||
# * Only run by default if called by a module.
|
||||
# * Can be called directly with its arguments as a comma-separated string
|
||||
|
||||
|
||||
submodule r_gta
|
||||
@@ -1320,6 +1316,10 @@ r_interactions() {
|
||||
|
||||
submodule r_join_interactions
|
||||
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
||||
# Output files:
|
||||
# * REMcRdy_lm_only.csv
|
||||
# * Shift_only.csv
|
||||
# * parameters.csv
|
||||
# @arg $1 string The output directory
|
||||
# @arg $2 string The sd value
|
||||
# @arg $3 string The studyInfo file
|
||||
@@ -1552,19 +1552,15 @@ get_studies() {
|
||||
|
||||
# submodule choose_easy_results_dir #
|
||||
# # @description Chooses an EASY scans directory if the information is undefined
|
||||
# # TODO: Standardize EASY output, it's hard to understand
|
||||
# # TODO Standardize EASY output, it's hard to understand
|
||||
# # TODO eventually we could run this on multiple results dirs simultaneously with some refactoring
|
||||
# # @exitcode 0 if successfully choose an EASY results dir
|
||||
# # @set EASY_RESULTS_DIR string The working EASY output directory
|
||||
# choose_easy_results_dir() {
|
||||
# debug "Running: ${FUNCNAME[0]}"
|
||||
|
||||
|
||||
|
||||
# # Always backup existing output
|
||||
# # This would happen if you ran the same experiment twice in one day, for instance
|
||||
# [[ -d $EASY_RESULTS_DIR ]] && backup "$EASY_RESULTS_DIR"
|
||||
|
||||
# if [[ ! -d $EASY_RESULTS_DIR ]]; then
|
||||
# debug "mkdir $EASY_RESULTS_DIR"
|
||||
# mkdir "$EASY_RESULTS_DIR"
|
||||
@@ -1572,7 +1568,6 @@ get_studies() {
|
||||
# err "Could not create $EASY_RESULTS_DIR"
|
||||
# return 0
|
||||
# fi
|
||||
|
||||
# # echo "Hit enter to use the default EASY results directory: $default_easy_results_dir"
|
||||
# # if ! (( YES )); then
|
||||
# # read -r -p "Or enter a custom directory name, example: $PROJECT" dirname
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
// Copyright 2024 Bryan C. Roessler
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy
|
||||
1,ExpName1,NA,NA,UserInitials
|
||||
2,ExpName2,NA,NA,UserInitials
|
||||
3,ExpName3,NA,NA,UserInitials
|
||||
|
BIN
workflow/templates/exp/ZScores/.DS_Store
vendored
BIN
workflow/templates/exp/ZScores/.DS_Store
vendored
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user