Some more refactoring and cleanup

This commit is contained in:
2024-08-02 18:48:09 -04:00
parent 5ca6ef8f01
commit b5aaf9ffb4
5 changed files with 45 additions and 2770 deletions

View File

@@ -74,7 +74,7 @@ print_help() {
script-run-workflow [[OPTION] [VALUE]]... script-run-workflow [[OPTION] [VALUE]]...
Some options (--project, --include, --exclude) can be passed multiple times or Some options (--project, --include, --exclude) can be passed multiple times or
by using comma deliminated strings (see EXAMPLES below) by using comma-separated strings (see EXAMPLES below)
OPTIONS: OPTIONS:
--project, -p PROJECT --project, -p PROJECT
@@ -117,6 +117,7 @@ print_help() {
script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]} script-run-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]}
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug
script-run-workflow --project=${PROJECT_PREFIX}_MY_PROJECT --submodule ${ALL_SUBMODULES[2]} \"/path/to/genefile.txt,/path/to/output/dir\" --submodule ${ALL_SUBMODULES[3]} \"/path/to/sgofile\"
EOF EOF
} }
@@ -326,7 +327,7 @@ print_header() {
# Let user choose project(s) # Let user choose project(s)
if [[ -z ${PROJECTS[*]} ]]; then if [[ -z ${PROJECTS[*]} ]]; then
num=$((${#projects[@]})) num=$((${#projects[@]}))
echo "Enter comma delimited project #'s (from list) to analyze" echo "Enter a comma-separated list of project numbers to analyze"
read -r -p "Or hit Enter to add a new project" response read -r -p "Or hit Enter to add a new project" response
[[ -z $response ]] && ask_pn && PROJECTS+=("${ADD_PROJECTS[@]}") [[ -z $response ]] && ask_pn && PROJECTS+=("${ADD_PROJECTS[@]}")
((YES)) || read -r -p "Hit enter for default ($num): " response ((YES)) || read -r -p "Hit enter for default ($num): " response
@@ -346,7 +347,7 @@ print_header() {
done done
if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} ]]; then if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} ]]; then
echo "Enter module #'s to run (by #, comma delimited)" echo "Enter a comma-separated list of modules to run"
((YES)) || read -r -p "Hit Enter for all (default) or '0' for none: " response ((YES)) || read -r -p "Hit Enter for all (default) or '0' for none: " response
if [[ -n $response && $response -ne 0 ]]; then if [[ -n $response && $response -ne 0 ]]; then
IFS=',' read -ra arr <<< "$response" IFS=',' read -ra arr <<< "$response"
@@ -359,7 +360,8 @@ print_header() {
if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} && -z ${SUBMODULES[*]} ]]; then if [[ -z ${MODULES[*]} && -z ${EXCLUDE_MODULES[*]} && -z ${SUBMODULES[*]} ]]; then
while :; do while :; do
echo "Enter a submodule followed by its arguments as a case delimited string in quotes" echo "Enter a submodule followed by its arguments as a comma-separated string"
echo "Quote your string if there are any whitespaces"
echo "Example: ${ALL_SUBMODULES[0]} \"arg1,arg2,arg3...\"" echo "Example: ${ALL_SUBMODULES[0]} \"arg1,arg2,arg3...\""
((YES)) || read -r -p "Or hit Enter to continue: " response ((YES)) || read -r -p "Or hit Enter to continue: " response
[[ -z $response ]] && break [[ -z $response ]] && break
@@ -520,6 +522,7 @@ init_project() {
if ask "You can edit this file in the qhtcp module"; then if ask "You can edit this file in the qhtcp module"; then
cat <<-EOF > "$STUDY_INFO_FILE" cat <<-EOF > "$STUDY_INFO_FILE"
"ExpNumb","ExpLabel","BackgroundSD","ZscoreJoinSD","AnalysisBy" "ExpNumb","ExpLabel","BackgroundSD","ZscoreJoinSD","AnalysisBy"
EOF EOF
fi fi
fi fi
@@ -943,15 +946,10 @@ qhtcp() {
module remc module remc
# @section GTF # @section remc
# @description GTF module for QHTCP # @description remc module for QHTCP
# TODO which components of remc can be parallelized? # TODO
# The submodules in remc really like to be run from the REMc dir # * Which components can be parallelized?
# so we pop in and out for now
# NOTE the remc modules could use some love
# * Don't cd within scripts, it's confusing
# * Use arguments to pass configuration variables
# * This allows us to abstract the program away in script-run-workflow and treat it like a module
# @arg $1 string studyInfo file # @arg $1 string studyInfo file
remc() { remc() {
debug "Running: ${FUNCNAME[0]} $*" debug "Running: ${FUNCNAME[0]} $*"
@@ -992,14 +990,14 @@ module gtf
gtf() { gtf() {
debug "Running: ${FUNCNAME[0]}" debug "Running: ${FUNCNAME[0]}"
gtf_out_dir="${1:-$QHTCP_PROJECT_DIR/out/gtf}" gtf_out_dir="${1:-$QHTCP_PROJECT_DIR/out/gtf}"
gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
orf_list="${4:-"$APPS_DIR/r/ORF_List_Without_DAmPs.txt"}"
process_dir="$gtf_out_dir/process" process_dir="$gtf_out_dir/process"
function_dir="$gtf_out_dir/function" function_dir="$gtf_out_dir/function"
component_dir="$gtf_out_dir/component" component_dir="$gtf_out_dir/component"
gene_association_sgd="${2:-"$APPS_DIR/perl/gene_association.sgd"}"
gene_ontology_obo="${3:-"$APPS_DIR/perl/gene_ontology_edit.obo"}"
orf_list="${4:-"$APPS_DIR/perl/ORF_List_Without_DAmPs.txt"}"
py_gtf_dcon \ py_gtf_dcon \
"$process_dir" \ "$process_dir" \
"$gtf_out_dir" "$gtf_out_dir"
@@ -1040,26 +1038,26 @@ module gta
# TODO # TODO
# * # *
# * # *
# @set GTA_OUT_DIR string The GTA output results dir # @arg $1 string output directory
# @set all_sgd_terms_csv string The all_SGD_GOTerms_for_QHTCPtk.csv file # @arg $2 string gene_association.sgd
# @set sgd_terms_tfile string The go_terms.tab file # @arg $3 string gene_ontology_edit.obo
# @set sgd_features_file string The gene_association.sgd file # @arg $4 string go_terms.tab
# @set gene_ontology_file string The gene_ontology_edit.obo file # @arg $5 string All_SGD_GOTerms_for_QHTCPtk.csv
# @set zscores_file string The ZScores_interaction.csv file # @arg $6 string zscores_interaction.csv
gta() { gta() {
debug "Running: ${FUNCNAME[0]}" debug "Running: ${FUNCNAME[0]}"
GTA_OUT_DIR="$QHTCP_PROJECT_DIR/gta" gta_out_dir="${1:-"$QHTCP_PROJECT_DIR/gta"}"
all_sgd_terms_csv="$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv" gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
sgd_terms_tfile="$APPS_DIR/r/go_terms.tab" gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
sgd_features_file="$APPS_DIR/r/gene_association.sgd" sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
gene_ontology_file="$APPS_DIR/r/gene_ontology_edit.obo" all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
zscores_file="zscores/zscores_interaction.csv" zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results
# Sets STUDIES_NUM and NUM_STUDIES # Sets STUDIES_NUM and NUM_STUDIES
get_studies "$STUDY_INFO_FILE" get_studies "$STUDY_INFO_FILE"
[[ -d $GTA_OUT_DIR ]] || mkdir "$GTA_OUT_DIR" [[ -d $gta_out_dir ]] || mkdir "$gta_out_dir"
# Loop over the array and create pairwise arrays # Loop over the array and create pairwise arrays
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
@@ -1088,13 +1086,13 @@ gta() {
for s in "${STUDIES_NUMS[@]}"; do for s in "${STUDIES_NUMS[@]}"; do
zscores_file="$QHTCP_PROJECT_DIR/Exp$s/$zscores_file" zscores_file="$QHTCP_PROJECT_DIR/Exp$s/$zscores_file"
if [[ -f $zscores_file ]]; then if [[ -f $zscores_file ]]; then
mkdir "$GTA_OUT_DIR/Exp$s" mkdir "$gta_out_dir/Exp$s"
r_gta \ r_gta \
"Exp$s" \ "Exp$s" \
"$zscores_file" \ "$zscores_file" \
"$sgd_terms_tfile" \ "$sgd_terms_tfile" \
"$sgd_features_file" \ "$gene_association_sgd" \
"$GTA_OUT_DIR" "$gta_out_dir"
fi fi
done done
@@ -1102,7 +1100,7 @@ gta() {
for combo in "${study_combos[@]}"; do for combo in "${study_combos[@]}"; do
# Split on comma and assign to array # Split on comma and assign to array
IFS=',' read -ra studies <<< "$combo" IFS=',' read -ra studies <<< "$combo"
r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$GTA_OUT_DIR" r_gta_pairwiselk "${studies[0]}" "${studies[1]}" "$STUDY_INFO_FILE" "$gta_out_dir"
done done
# All studies # All studies
@@ -1110,7 +1108,7 @@ gta() {
# are required # are required
r_gta_heatmaps \ r_gta_heatmaps \
"$STUDY_INFO_FILE" \ "$STUDY_INFO_FILE" \
"$gene_ontology_file" \ "$gene_ontology_obo" \
"$sgd_terms_tfile" \ "$sgd_terms_tfile" \
"$all_sgd_terms_csv" \ "$all_sgd_terms_csv" \
"$zscores_file" \ "$zscores_file" \
@@ -1121,14 +1119,12 @@ gta() {
# @section Submodules # @section Submodules
# @description Submodules provide functionality to modules and should be reusable # @description Submodules are shell wrappers for workflow components in external languages.
# A submodule only runs by default if called by a module # Submodules:
# Use a submodule for: # * Allow scripts to be called by the main workflow script using input\
# * Calling external scripts # and output arguments as a translation mechanism.
# * Performing repetitive tasks # * Only run by default if called by a module.
# * Generalizing code # * Can be called directly with its arguments as a comma-separated string
# * Functions you do not want to perform by default (submodules should be called modules)
# * Should not call cd or pushd (let module dictate)
submodule r_gta submodule r_gta
@@ -1320,6 +1316,10 @@ r_interactions() {
submodule r_join_interactions submodule r_join_interactions
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
# Output files:
# * REMcRdy_lm_only.csv
# * Shift_only.csv
# * parameters.csv
# @arg $1 string The output directory # @arg $1 string The output directory
# @arg $2 string The sd value # @arg $2 string The sd value
# @arg $3 string The studyInfo file # @arg $3 string The studyInfo file
@@ -1552,19 +1552,15 @@ get_studies() {
# submodule choose_easy_results_dir # # submodule choose_easy_results_dir #
# # @description Chooses an EASY scans directory if the information is undefined # # @description Chooses an EASY scans directory if the information is undefined
# # TODO: Standardize EASY output, it's hard to understand # # TODO Standardize EASY output, it's hard to understand
# # TODO eventually we could run this on multiple results dirs simultaneously with some refactoring # # TODO eventually we could run this on multiple results dirs simultaneously with some refactoring
# # @exitcode 0 if successfully choose an EASY results dir # # @exitcode 0 if successfully choose an EASY results dir
# # @set EASY_RESULTS_DIR string The working EASY output directory # # @set EASY_RESULTS_DIR string The working EASY output directory
# choose_easy_results_dir() { # choose_easy_results_dir() {
# debug "Running: ${FUNCNAME[0]}" # debug "Running: ${FUNCNAME[0]}"
# # Always backup existing output # # Always backup existing output
# # This would happen if you ran the same experiment twice in one day, for instance # # This would happen if you ran the same experiment twice in one day, for instance
# [[ -d $EASY_RESULTS_DIR ]] && backup "$EASY_RESULTS_DIR" # [[ -d $EASY_RESULTS_DIR ]] && backup "$EASY_RESULTS_DIR"
# if [[ ! -d $EASY_RESULTS_DIR ]]; then # if [[ ! -d $EASY_RESULTS_DIR ]]; then
# debug "mkdir $EASY_RESULTS_DIR" # debug "mkdir $EASY_RESULTS_DIR"
# mkdir "$EASY_RESULTS_DIR" # mkdir "$EASY_RESULTS_DIR"
@@ -1572,7 +1568,6 @@ get_studies() {
# err "Could not create $EASY_RESULTS_DIR" # err "Could not create $EASY_RESULTS_DIR"
# return 0 # return 0
# fi # fi
# # echo "Hit enter to use the default EASY results directory: $default_easy_results_dir" # # echo "Hit enter to use the default EASY results directory: $default_easy_results_dir"
# # if ! (( YES )); then # # if ! (( YES )); then
# # read -r -p "Or enter a custom directory name, example: $PROJECT" dirname # # read -r -p "Or enter a custom directory name, example: $PROJECT" dirname

View File

@@ -1,14 +0,0 @@
// Copyright 2024 Bryan C. Roessler
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

View File

@@ -1,4 +0,0 @@
ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy
1,ExpName1,NA,NA,UserInitials
2,ExpName2,NA,NA,UserInitials
3,ExpName3,NA,NA,UserInitials
1 ExpNumb ExpLabel BackgroundSD ZscoreJoinSD AnalysisBy
2 1 ExpName1 NA NA UserInitials
3 2 ExpName2 NA NA UserInitials
4 3 ExpName3 NA NA UserInitials

Binary file not shown.