Rollup before removing NAs from joinInteractExps.R
This commit is contained in:
@@ -139,7 +139,7 @@ print_help() {
|
||||
# `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string
|
||||
# @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string
|
||||
# @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string
|
||||
# @option -w<value> | --wrapper=<value> Requires two arguments: the name of the wrapper and its arguments, can be passed multiple times
|
||||
# @option -w<value> | --wrapper=<value> One or more wrappers and its arguments to run, can be passed multiple times or with a comma-separated string
|
||||
# @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis
|
||||
# @option --markdown Generate the shdoc markdown file for this program
|
||||
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
||||
@@ -163,20 +163,22 @@ parse_input() {
|
||||
case $1 in
|
||||
--project|-p)
|
||||
shift
|
||||
declare -ga PROJECTS
|
||||
IFS=',' read -ra PROJECTS <<< "$1"
|
||||
;;
|
||||
--module|-m)
|
||||
shift
|
||||
declare -ga MODULES
|
||||
IFS=',' read -ra MODULES <<< "$1"
|
||||
;;
|
||||
--wrapper|-w)
|
||||
shift
|
||||
IFS=',' read -ra WRAPPERS <<< "$1"
|
||||
shift
|
||||
declare -ga WRAPPERS
|
||||
IFS=',' read -ra WRAPPERS <<< "$1"
|
||||
;;
|
||||
--nomodule|-n)
|
||||
shift
|
||||
declare -ga EXCLUDE_MODULES
|
||||
IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
|
||||
;;
|
||||
--markdown)
|
||||
@@ -336,6 +338,10 @@ execute() {
|
||||
|
||||
# @description Backup one or more files to an incremented .bk file
|
||||
#
|
||||
# **TODO**
|
||||
#
|
||||
# * Make backups hidden by prepending "."?
|
||||
#
|
||||
# @exitcode backup iterator max 255
|
||||
# @internal
|
||||
backup() {
|
||||
@@ -343,8 +349,8 @@ backup() {
|
||||
for f in "$@"; do
|
||||
[[ -e $f ]] || continue
|
||||
count=1
|
||||
while [[ -f $f.bk.$count ]]; do
|
||||
count=$((count++))
|
||||
while [[ -e $f.bk.$count ]]; do
|
||||
((count++))
|
||||
done
|
||||
echo "Backing up $f to $f.bk.$count"
|
||||
debug "rsync -a $f $f.bk.$count"
|
||||
@@ -525,7 +531,7 @@ interactive_header() {
|
||||
echo ""
|
||||
|
||||
# Module selection
|
||||
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 ]]; then
|
||||
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
|
||||
cat <<-EOF
|
||||
${underline}Enter modules(s) to run${nounderline}
|
||||
* <Enter> for all
|
||||
@@ -554,7 +560,7 @@ interactive_header() {
|
||||
# If we're just installing dependencies, skip the rest
|
||||
[[ ${MODULES[*]} == "install_dependencies" ]] && return 0
|
||||
|
||||
# Submodule selection
|
||||
# Wrapper selection
|
||||
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
|
||||
while :; do
|
||||
cat <<-EOF
|
||||
@@ -1335,7 +1341,7 @@ qhtcp() {
|
||||
[[ -d $QHTCP_RESULTS_DIR ]] ||
|
||||
err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?"
|
||||
|
||||
# Sets STUDIES_NUMS and STUDIES_DIRS
|
||||
# Sets STUDIES
|
||||
study_info
|
||||
|
||||
choose_easy_results "$EASY_OUT_DIR"
|
||||
@@ -1348,22 +1354,24 @@ qhtcp() {
|
||||
# # TODO Add them all to StudiesDataArchive?
|
||||
# # Probably better to always add and remove dupes later since each invocation "counts"?
|
||||
# for f in "${EASY_RESULTS_FILES[@]}"; do
|
||||
# for s in "${STUDIES_NUMS[@]}"; do
|
||||
# for study in "${STUDIES[@]}"; do
|
||||
# read -r num sd dir <<< "$study"
|
||||
# # Trying to match old ExpFrontend formatting
|
||||
# printf "%s\t" \
|
||||
# "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$s" \
|
||||
# "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$num" \
|
||||
# "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \
|
||||
# >> "$STUDIES_ARCHIVE_FILE"
|
||||
# done
|
||||
# done
|
||||
|
||||
# Run R interactions script on all studies
|
||||
for s in "${STUDIES_NUMS[@]}"; do
|
||||
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores ]] ||
|
||||
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores"
|
||||
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores/qc ]] ||
|
||||
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores/qc"
|
||||
r_interactions "$s"
|
||||
for study in "${STUDIES[@]}"; do
|
||||
read -r num sd dir <<< "$study"
|
||||
[[ -d $dir/zscores ]] ||
|
||||
execute mkdir "$dir/zscores"
|
||||
[[ -d $dir/zscores/qc ]] ||
|
||||
execute mkdir "$dir/zscores/qc"
|
||||
r_interactions "$num" "$sd"
|
||||
done \
|
||||
&& remc \
|
||||
&& gtf \
|
||||
@@ -1384,13 +1392,12 @@ module remc
|
||||
remc() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
|
||||
# Sets STUDIES_NUMS and STUDIES_DIRS
|
||||
# Sets STUDIES
|
||||
study_info
|
||||
|
||||
# If any wrappers fail the rest will not run, this is fundamental to module design
|
||||
# Remove leading && to run regardless
|
||||
r_join_interactions \
|
||||
"${STUDIES_DIRS[@]}" \
|
||||
&& java_extract \
|
||||
&& r_add_shift_values \
|
||||
&& r_create_heat_maps \
|
||||
@@ -1453,36 +1460,40 @@ module gta
|
||||
gta() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
|
||||
gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
|
||||
# gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
|
||||
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
|
||||
sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
|
||||
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
|
||||
|
||||
# TODO This could be wrong, it could be in main results
|
||||
|
||||
# Sets STUDIES_NUMS and STUDIES_DIRS
|
||||
# Sets STUDIES
|
||||
study_info
|
||||
|
||||
[[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR"
|
||||
execute mkdir "$GTA_OUT_DIR"
|
||||
|
||||
# Loop over the array and create pairwise arrays
|
||||
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
|
||||
for ((j=i+1; j<${#STUDIES_NUMS[@]}; j++)); do
|
||||
pair=("${STUDIES_NUMS[i]}" "${STUDIES_NUMS[j]}")
|
||||
for ((i=0; i<${#STUDIES[@]}; i++)); do
|
||||
for ((j=i+1; j<${#STUDIES[@]}; j++)); do
|
||||
read -r num1 _ _ <<< "${STUDIES[i]}"
|
||||
read -r num2 _ _ <<< "${STUDIES[j]}"
|
||||
pair=("$num1" "$num2")
|
||||
echo "${pair[@]}"
|
||||
done
|
||||
done
|
||||
|
||||
# Create unique parwise combinations of study nums from dir names
|
||||
study_combos=()
|
||||
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
|
||||
for ((i=0; i<${#STUDIES[@]}; i++)); do
|
||||
# Loop through the array again
|
||||
for ((j=0; j<${#STUDIES_NUMS[@]}; j++)); do
|
||||
for ((j=0; j<${#STUDIES[@]}; j++)); do
|
||||
# If the indices are not the same
|
||||
if [ "$i" != "$j" ]; then
|
||||
# Print the unique combination
|
||||
study_combos+=("${STUDIES_NUMS[$i]},${STUDIES_NUMS[$j]}")
|
||||
read -r num1 _ _ <<< "${STUDIES[i]}"
|
||||
read -r num2 _ _ <<< "${STUDIES[j]}"
|
||||
study_combos+=("$num1,$num2")
|
||||
fi
|
||||
done
|
||||
done
|
||||
@@ -1490,11 +1501,12 @@ gta() {
|
||||
# The following are three types of studies
|
||||
|
||||
# Individual studies
|
||||
for s in "${STUDIES_NUMS[@]}"; do
|
||||
zscores_file="$QHTCP_RESULTS_DIR/Exp$s/$zscores_file"
|
||||
for study in "${STUDIES[@]}"; do
|
||||
read -r num _ dir <<< "$study"
|
||||
zscores_file="$dir/zscores/zscores_interaction.csv"
|
||||
if [[ -f $zscores_file ]]; then
|
||||
mkdir "$GTA_OUT_DIR/Exp$s"
|
||||
r_gta "Exp$s" "$zscores_file"
|
||||
mkdir "$GTA_OUT_DIR/Exp$num"
|
||||
r_gta "Exp$num" "$zscores_file"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -1507,6 +1519,12 @@ gta() {
|
||||
|
||||
# All studies
|
||||
# All preceding arguments are required so we can pass multiple studies
|
||||
declare -a nums
|
||||
for study in "${STUDIES[@]}"; do
|
||||
read -r num _ _ <<< "$study"
|
||||
nums+=("$num")
|
||||
done
|
||||
|
||||
r_gta_heatmaps \
|
||||
"$STUDY_INFO_FILE" \
|
||||
"$gene_ontology_obo" \
|
||||
@@ -1514,7 +1532,7 @@ gta() {
|
||||
"$all_sgd_terms_csv" \
|
||||
"$QHTCP_RESULTS_DIR" \
|
||||
"$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \
|
||||
"${STUDIES_NUMS[@]}"
|
||||
"${nums[@]}"
|
||||
}
|
||||
|
||||
|
||||
@@ -1714,7 +1732,7 @@ wrapper r_interactions
|
||||
# @arg $3 string study info file
|
||||
# @arg $4 string SGD_features.tab
|
||||
# @arg $5 string easy/results_std.txt
|
||||
# @arg $6 string zscores directory
|
||||
# @arg $6 string output directory
|
||||
r_interactions() {
|
||||
debug "Running: ${FUNCNAME[0]} $*"
|
||||
cat <<-EOF
|
||||
@@ -1729,7 +1747,11 @@ r_interactions() {
|
||||
* Background values are reported in the results sheet and so could also be analyzed there.
|
||||
EOF
|
||||
|
||||
script="$APPS_DIR/r/interactions.R"
|
||||
declare script="$APPS_DIR/r/interactions.R"
|
||||
declare out_dir="${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}"
|
||||
|
||||
[[ -d $out_dir ]] && backup "$out_dir"
|
||||
mkdir "$out_dir"
|
||||
|
||||
execute "$RSCRIPT" "$script" \
|
||||
"$1" \
|
||||
@@ -1737,12 +1759,15 @@ r_interactions() {
|
||||
"${3:-"$STUDY_INFO_FILE"}" \
|
||||
"${4:-"$APPS_DIR/r/SGD_features.tab"}" \
|
||||
"${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \
|
||||
"${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}" \
|
||||
"$out_dir" \
|
||||
"${@:7}" # future arguments
|
||||
|
||||
[[ -f "$out_dir/zscores_interaction.csv" ]] || (echo "$out_dir/zscores_interaction.csv does not exist"; return 1)
|
||||
}
|
||||
|
||||
|
||||
wrapper r_join_interactions
|
||||
# shellcheck disable=SC2120
|
||||
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
||||
#
|
||||
# TODO
|
||||
@@ -1760,20 +1785,41 @@ wrapper r_join_interactions
|
||||
# * Shift_only.csv
|
||||
# * parameters.csv
|
||||
#
|
||||
# @arg $1 string output directory
|
||||
# @arg $2 string sd value (default: 2)
|
||||
# @arg $3 string study info file
|
||||
# @arg $1 string output directory (required)
|
||||
# @arg $2 string sd value (default: 2) (required)
|
||||
# @arg $3 string study info file (required)
|
||||
# @arg $4 array studies (required)
|
||||
r_join_interactions() {
|
||||
debug "Running: ${FUNCNAME[0]} $*"
|
||||
script="$APPS_DIR/r/joinInteractExps.R"
|
||||
declare script="$APPS_DIR/r/joinInteractExps.R"
|
||||
declare -a dirs
|
||||
declare -a out_files=(
|
||||
"${1:-$QHTCP_RESULTS_DIR}/REMcRdy_lm_only.csv"
|
||||
"${1:-$QHTCP_RESULTS_DIR}/Shift_only.csv"
|
||||
"${1:-$QHTCP_RESULTS_DIR}/parameters.csv"
|
||||
)
|
||||
|
||||
((DEBUG)) && declare -p
|
||||
|
||||
backup "${out_files[@]}"
|
||||
|
||||
# If user provides study dirs, use those
|
||||
if [[ $# -gt 3 ]]; then
|
||||
dirs=("${@:4}")
|
||||
else
|
||||
study_info
|
||||
for study in "${STUDIES[@]}"; do
|
||||
read -r _ _ dir <<< "$study"
|
||||
dirs+=("$dir")
|
||||
done
|
||||
fi
|
||||
|
||||
execute "$RSCRIPT" "$script" \
|
||||
"${1:-$QHTCP_RESULTS_DIR}" \
|
||||
"${2:-2}" \
|
||||
"${3:-$STUDY_INFO_FILE}" \
|
||||
"${@:4:-${STUDIES_DIRS[@]}}"
|
||||
"${dirs[@]}"
|
||||
|
||||
local out_files=("$1/REMcRdy_lm_only.csv" "$1/Shift_only.csv" "$1/parameters.csv")
|
||||
for f in "${out_files[@]}"; do
|
||||
[[ -f $f ]] || (echo "$f does not exist"; return 1)
|
||||
done
|
||||
@@ -1816,6 +1862,9 @@ java_extract() {
|
||||
"${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}"
|
||||
"${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
|
||||
"${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
|
||||
1
|
||||
true
|
||||
true
|
||||
)
|
||||
|
||||
debug "pushd && ${java_cmd[*]} && popd"
|
||||
@@ -2021,8 +2070,7 @@ r_compile_gtf() {
|
||||
#
|
||||
# @exitcode 0 If one or more studies found
|
||||
# @exitcode 1 If no studies found
|
||||
# @set STUDIES_NUMS array contains Exp numbers
|
||||
# @set STUDIES_DIRS array contains Exp directories
|
||||
# @set STUDIES array contains array of "Exp# sd ExpDir"
|
||||
study_info() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
|
||||
@@ -2116,26 +2164,28 @@ study_info() {
|
||||
fi
|
||||
|
||||
# Read study info file
|
||||
while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
|
||||
STUDIES_NUMS+=("$col1")
|
||||
declare -ga STUDIES
|
||||
while IFS=',' read -r num _ sd _; do
|
||||
STUDIES+=("$num $sd $QHTCP_RESULTS_DIR/Exp$num")
|
||||
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
|
||||
|
||||
# Initialize missing Exp dirs
|
||||
STUDIES_DIRS=()
|
||||
for s in "${STUDIES_NUMS[@]}"; do
|
||||
study_dir="$QHTCP_RESULTS_DIR/Exp$s"
|
||||
STUDIES_DIRS+=("$study_dir")
|
||||
[[ -d $study_dir ]] || mkdir "$study_dir"
|
||||
|
||||
# We don't need a template anymore?
|
||||
# if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
|
||||
# err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
|
||||
# continue
|
||||
# fi
|
||||
for study in "${STUDIES[@]}"; do
|
||||
read -r _ _ dir <<< "$study"
|
||||
[[ -d $dir ]] || mkdir "$dir"
|
||||
done
|
||||
|
||||
# # We don't need a template anymore?
|
||||
# # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
|
||||
# # err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
|
||||
# # continue
|
||||
# # fi
|
||||
# done
|
||||
|
||||
((DEBUG)) && declare -p STUDIES
|
||||
|
||||
# Return true if at least one study was found
|
||||
[[ ${#STUDIES_NUMS[@]} -gt 0 ]]
|
||||
[[ ${#STUDIES[@]} -gt 0 ]]
|
||||
}
|
||||
|
||||
|
||||
@@ -2287,6 +2337,8 @@ main() {
|
||||
|
||||
parse_input "$@" # parse arguments with getopt
|
||||
|
||||
# ((DEBUG)) && declare -p
|
||||
|
||||
interactive_header "$@"
|
||||
|
||||
# # Prompt user for the PROJECT if we still don't have one
|
||||
@@ -2345,14 +2397,8 @@ main() {
|
||||
declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta"
|
||||
declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf"
|
||||
declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
|
||||
if ((DEBUG)); then
|
||||
echo "Debug:"
|
||||
declare -p SCANS_DIR OUT_DIR TEMPLATES_DIR APPS_DIR \
|
||||
PROJECTS PROJECT_NAME \
|
||||
PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \
|
||||
STUDIES_ARCHIVE_FILE QHTCP_RESULTS_DIR QHTCP_TEMPLATE_DIR \
|
||||
STUDY_INFO_FILE EASY_RESULTS_DIR R_LIBS_USER
|
||||
fi
|
||||
|
||||
# ((DEBUG)) && declare -p
|
||||
|
||||
debug "Active modules: ${MODULES[*]}"
|
||||
debug "Active wrappers and their args: ${WRAPPERS[*]}"
|
||||
@@ -2365,21 +2411,19 @@ main() {
|
||||
done
|
||||
|
||||
# Run selected wrappers
|
||||
for i in "${!WRAPPERS[@]}"; do
|
||||
IFS=',' read -ra args <<< "${WRAPPERS[$((i+1))]}" # load the command args
|
||||
if ask "Run ${WRAPPERS[i]} wrapper with args ${args[*]}?"; then
|
||||
"${WRAPPERS[i]}" "${args[@]}" || return 1
|
||||
for wrapper in "${WRAPPERS[@]}"; do
|
||||
IFS=',' read -ra args <<< "$wrapper" # load the command args
|
||||
if ask "Run ${args[0]} wrapper with args ${args[*]:1}?"; then
|
||||
"${args[0]}" "${args[@]:1}" || return 1
|
||||
fi
|
||||
continue 2 # skip the command string
|
||||
done
|
||||
done
|
||||
|
||||
cat <<-EOF
|
||||
Successfully ran module(s): ${MODULES[*]}
|
||||
And wrapper(s): ${WRAPPERS[*]}
|
||||
On project(s): ${PROJECTS[*]}
|
||||
EOF
|
||||
unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES_NUMS STUDIES_DIRS SET_STUDIES YES
|
||||
[[ ${#MODULES[@]} -gt 0 ]] && echo "Successfully ran module(s): ${MODULES[*]}"
|
||||
[[ ${#WRAPPERS[@]} -gt 0 ]] && echo "Successfully ran wrapper(s): ${WRAPPERS[*]}"
|
||||
[[ ${#PROJECTS[@]} -gt 0 ]] && echo "On project(s): ${PROJECTS[*]}"
|
||||
|
||||
unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES SET_STUDIES YES
|
||||
}
|
||||
|
||||
# (Safe) main loop
|
||||
|
||||
Reference in New Issue
Block a user