Rollup before removing NAs from joinInteractExps.R

This commit is contained in:
2024-08-15 15:02:53 -04:00
parent 6992d5eec0
commit 38b3f66695
3 changed files with 169 additions and 117 deletions

View File

@@ -139,7 +139,7 @@ print_help() {
# `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string
# @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string
# @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string
# @option -w<value> | --wrapper=<value> Requires two arguments: the name of the wrapper and its arguments, can be passed multiple times
# @option -w<value> | --wrapper=<value> One or more wrappers and its arguments to run, can be passed multiple times or with a comma-separated string
# @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis
# @option --markdown Generate the shdoc markdown file for this program
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
@@ -163,20 +163,22 @@ parse_input() {
case $1 in
--project|-p)
shift
declare -ga PROJECTS
IFS=',' read -ra PROJECTS <<< "$1"
;;
--module|-m)
shift
declare -ga MODULES
IFS=',' read -ra MODULES <<< "$1"
;;
--wrapper|-w)
shift
IFS=',' read -ra WRAPPERS <<< "$1"
shift
declare -ga WRAPPERS
IFS=',' read -ra WRAPPERS <<< "$1"
;;
--nomodule|-n)
shift
declare -ga EXCLUDE_MODULES
IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
;;
--markdown)
@@ -336,6 +338,10 @@ execute() {
# @description Backup one or more files to an incremented .bk file
#
# **TODO**
#
# * Make backups hidden by prepending "."?
#
# @exitcode backup iterator max 255
# @internal
backup() {
@@ -343,8 +349,8 @@ backup() {
for f in "$@"; do
[[ -e $f ]] || continue
count=1
while [[ -f $f.bk.$count ]]; do
count=$((count++))
while [[ -e $f.bk.$count ]]; do
((count++))
done
echo "Backing up $f to $f.bk.$count"
debug "rsync -a $f $f.bk.$count"
@@ -525,7 +531,7 @@ interactive_header() {
echo ""
# Module selection
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 ]]; then
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
cat <<-EOF
${underline}Enter modules(s) to run${nounderline}
* <Enter> for all
@@ -554,7 +560,7 @@ interactive_header() {
# If we're just installing dependencies, skip the rest
[[ ${MODULES[*]} == "install_dependencies" ]] && return 0
# Submodule selection
# Wrapper selection
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
while :; do
cat <<-EOF
@@ -1335,7 +1341,7 @@ qhtcp() {
[[ -d $QHTCP_RESULTS_DIR ]] ||
err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?"
# Sets STUDIES_NUMS and STUDIES_DIRS
# Sets STUDIES
study_info
choose_easy_results "$EASY_OUT_DIR"
@@ -1348,22 +1354,24 @@ qhtcp() {
# # TODO Add them all to StudiesDataArchive?
# # Probably better to always add and remove dupes later since each invocation "counts"?
# for f in "${EASY_RESULTS_FILES[@]}"; do
# for s in "${STUDIES_NUMS[@]}"; do
# for study in "${STUDIES[@]}"; do
# read -r num sd dir <<< "$study"
# # Trying to match old ExpFrontend formatting
# printf "%s\t" \
# "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$s" \
# "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$num" \
# "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \
# >> "$STUDIES_ARCHIVE_FILE"
# done
# done
# Run R interactions script on all studies
for s in "${STUDIES_NUMS[@]}"; do
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores ]] ||
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores"
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores/qc ]] ||
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores/qc"
r_interactions "$s"
for study in "${STUDIES[@]}"; do
read -r num sd dir <<< "$study"
[[ -d $dir/zscores ]] ||
execute mkdir "$dir/zscores"
[[ -d $dir/zscores/qc ]] ||
execute mkdir "$dir/zscores/qc"
r_interactions "$num" "$sd"
done \
&& remc \
&& gtf \
@@ -1384,13 +1392,12 @@ module remc
remc() {
debug "Running: ${FUNCNAME[0]}"
# Sets STUDIES_NUMS and STUDIES_DIRS
# Sets STUDIES
study_info
# If any wrappers fail the rest will not run, this is fundamental to module design
# Remove leading && to run regardless
r_join_interactions \
"${STUDIES_DIRS[@]}" \
&& java_extract \
&& r_add_shift_values \
&& r_create_heat_maps \
@@ -1453,36 +1460,40 @@ module gta
gta() {
debug "Running: ${FUNCNAME[0]}"
gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
# gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
# TODO This could be wrong, it could be in main results
# Sets STUDIES_NUMS and STUDIES_DIRS
# Sets STUDIES
study_info
[[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR"
execute mkdir "$GTA_OUT_DIR"
# Loop over the array and create pairwise arrays
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
for ((j=i+1; j<${#STUDIES_NUMS[@]}; j++)); do
pair=("${STUDIES_NUMS[i]}" "${STUDIES_NUMS[j]}")
for ((i=0; i<${#STUDIES[@]}; i++)); do
for ((j=i+1; j<${#STUDIES[@]}; j++)); do
read -r num1 _ _ <<< "${STUDIES[i]}"
read -r num2 _ _ <<< "${STUDIES[j]}"
pair=("$num1" "$num2")
echo "${pair[@]}"
done
done
# Create unique parwise combinations of study nums from dir names
study_combos=()
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
for ((i=0; i<${#STUDIES[@]}; i++)); do
# Loop through the array again
for ((j=0; j<${#STUDIES_NUMS[@]}; j++)); do
for ((j=0; j<${#STUDIES[@]}; j++)); do
# If the indices are not the same
if [ "$i" != "$j" ]; then
# Print the unique combination
study_combos+=("${STUDIES_NUMS[$i]},${STUDIES_NUMS[$j]}")
read -r num1 _ _ <<< "${STUDIES[i]}"
read -r num2 _ _ <<< "${STUDIES[j]}"
study_combos+=("$num1,$num2")
fi
done
done
@@ -1490,11 +1501,12 @@ gta() {
# The following are three types of studies
# Individual studies
for s in "${STUDIES_NUMS[@]}"; do
zscores_file="$QHTCP_RESULTS_DIR/Exp$s/$zscores_file"
for study in "${STUDIES[@]}"; do
read -r num _ dir <<< "$study"
zscores_file="$dir/zscores/zscores_interaction.csv"
if [[ -f $zscores_file ]]; then
mkdir "$GTA_OUT_DIR/Exp$s"
r_gta "Exp$s" "$zscores_file"
mkdir "$GTA_OUT_DIR/Exp$num"
r_gta "Exp$num" "$zscores_file"
fi
done
@@ -1507,6 +1519,12 @@ gta() {
# All studies
# All preceding arguments are required so we can pass multiple studies
declare -a nums
for study in "${STUDIES[@]}"; do
read -r num _ _ <<< "$study"
nums+=("$num")
done
r_gta_heatmaps \
"$STUDY_INFO_FILE" \
"$gene_ontology_obo" \
@@ -1514,7 +1532,7 @@ gta() {
"$all_sgd_terms_csv" \
"$QHTCP_RESULTS_DIR" \
"$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \
"${STUDIES_NUMS[@]}"
"${nums[@]}"
}
@@ -1714,7 +1732,7 @@ wrapper r_interactions
# @arg $3 string study info file
# @arg $4 string SGD_features.tab
# @arg $5 string easy/results_std.txt
# @arg $6 string zscores directory
# @arg $6 string output directory
r_interactions() {
debug "Running: ${FUNCNAME[0]} $*"
cat <<-EOF
@@ -1729,7 +1747,11 @@ r_interactions() {
* Background values are reported in the results sheet and so could also be analyzed there.
EOF
script="$APPS_DIR/r/interactions.R"
declare script="$APPS_DIR/r/interactions.R"
declare out_dir="${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}"
[[ -d $out_dir ]] && backup "$out_dir"
mkdir "$out_dir"
execute "$RSCRIPT" "$script" \
"$1" \
@@ -1737,12 +1759,15 @@ r_interactions() {
"${3:-"$STUDY_INFO_FILE"}" \
"${4:-"$APPS_DIR/r/SGD_features.tab"}" \
"${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \
"${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}" \
"$out_dir" \
"${@:7}" # future arguments
[[ -f "$out_dir/zscores_interaction.csv" ]] || (echo "$out_dir/zscores_interaction.csv does not exist"; return 1)
}
wrapper r_join_interactions
# shellcheck disable=SC2120
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
#
# TODO
@@ -1760,20 +1785,41 @@ wrapper r_join_interactions
# * Shift_only.csv
# * parameters.csv
#
# @arg $1 string output directory
# @arg $2 string sd value (default: 2)
# @arg $3 string study info file
# @arg $1 string output directory (required)
# @arg $2 string sd value (default: 2) (required)
# @arg $3 string study info file (required)
# @arg $4 array studies (required)
r_join_interactions() {
debug "Running: ${FUNCNAME[0]} $*"
script="$APPS_DIR/r/joinInteractExps.R"
declare script="$APPS_DIR/r/joinInteractExps.R"
declare -a dirs
declare -a out_files=(
"${1:-$QHTCP_RESULTS_DIR}/REMcRdy_lm_only.csv"
"${1:-$QHTCP_RESULTS_DIR}/Shift_only.csv"
"${1:-$QHTCP_RESULTS_DIR}/parameters.csv"
)
((DEBUG)) && declare -p
backup "${out_files[@]}"
# If user provides study dirs, use those
if [[ $# -gt 3 ]]; then
dirs=("${@:4}")
else
study_info
for study in "${STUDIES[@]}"; do
read -r _ _ dir <<< "$study"
dirs+=("$dir")
done
fi
execute "$RSCRIPT" "$script" \
"${1:-$QHTCP_RESULTS_DIR}" \
"${2:-2}" \
"${3:-$STUDY_INFO_FILE}" \
"${@:4:-${STUDIES_DIRS[@]}}"
"${dirs[@]}"
local out_files=("$1/REMcRdy_lm_only.csv" "$1/Shift_only.csv" "$1/parameters.csv")
for f in "${out_files[@]}"; do
[[ -f $f ]] || (echo "$f does not exist"; return 1)
done
@@ -1816,6 +1862,9 @@ java_extract() {
"${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}"
"${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
"${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
1
true
true
)
debug "pushd && ${java_cmd[*]} && popd"
@@ -2021,8 +2070,7 @@ r_compile_gtf() {
#
# @exitcode 0 If one or more studies found
# @exitcode 1 If no studies found
# @set STUDIES_NUMS array contains Exp numbers
# @set STUDIES_DIRS array contains Exp directories
# @set STUDIES array contains array of "Exp# sd ExpDir"
study_info() {
debug "Running: ${FUNCNAME[0]}"
@@ -2116,26 +2164,28 @@ study_info() {
fi
# Read study info file
while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
STUDIES_NUMS+=("$col1")
declare -ga STUDIES
while IFS=',' read -r num _ sd _; do
STUDIES+=("$num $sd $QHTCP_RESULTS_DIR/Exp$num")
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
# Initialize missing Exp dirs
STUDIES_DIRS=()
for s in "${STUDIES_NUMS[@]}"; do
study_dir="$QHTCP_RESULTS_DIR/Exp$s"
STUDIES_DIRS+=("$study_dir")
[[ -d $study_dir ]] || mkdir "$study_dir"
# We don't need a template anymore?
# if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
# err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
# continue
# fi
for study in "${STUDIES[@]}"; do
read -r _ _ dir <<< "$study"
[[ -d $dir ]] || mkdir "$dir"
done
# # We don't need a template anymore?
# # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
# # err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
# # continue
# # fi
# done
((DEBUG)) && declare -p STUDIES
# Return true if at least one study was found
[[ ${#STUDIES_NUMS[@]} -gt 0 ]]
[[ ${#STUDIES[@]} -gt 0 ]]
}
@@ -2287,6 +2337,8 @@ main() {
parse_input "$@" # parse arguments with getopt
# ((DEBUG)) && declare -p
interactive_header "$@"
# # Prompt user for the PROJECT if we still don't have one
@@ -2345,14 +2397,8 @@ main() {
declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta"
declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf"
declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
if ((DEBUG)); then
echo "Debug:"
declare -p SCANS_DIR OUT_DIR TEMPLATES_DIR APPS_DIR \
PROJECTS PROJECT_NAME \
PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \
STUDIES_ARCHIVE_FILE QHTCP_RESULTS_DIR QHTCP_TEMPLATE_DIR \
STUDY_INFO_FILE EASY_RESULTS_DIR R_LIBS_USER
fi
# ((DEBUG)) && declare -p
debug "Active modules: ${MODULES[*]}"
debug "Active wrappers and their args: ${WRAPPERS[*]}"
@@ -2365,21 +2411,19 @@ main() {
done
# Run selected wrappers
for i in "${!WRAPPERS[@]}"; do
IFS=',' read -ra args <<< "${WRAPPERS[$((i+1))]}" # load the command args
if ask "Run ${WRAPPERS[i]} wrapper with args ${args[*]}?"; then
"${WRAPPERS[i]}" "${args[@]}" || return 1
for wrapper in "${WRAPPERS[@]}"; do
IFS=',' read -ra args <<< "$wrapper" # load the command args
if ask "Run ${args[0]} wrapper with args ${args[*]:1}?"; then
"${args[0]}" "${args[@]:1}" || return 1
fi
continue 2 # skip the command string
done
done
cat <<-EOF
Successfully ran module(s): ${MODULES[*]}
And wrapper(s): ${WRAPPERS[*]}
On project(s): ${PROJECTS[*]}
EOF
unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES_NUMS STUDIES_DIRS SET_STUDIES YES
[[ ${#MODULES[@]} -gt 0 ]] && echo "Successfully ran module(s): ${MODULES[*]}"
[[ ${#WRAPPERS[@]} -gt 0 ]] && echo "Successfully ran wrapper(s): ${WRAPPERS[*]}"
[[ ${#PROJECTS[@]} -gt 0 ]] && echo "On project(s): ${PROJECTS[*]}"
unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES SET_STUDIES YES
}
# (Safe) main loop