Improve get_studies

This commit is contained in:
2024-08-04 19:31:23 -04:00
parent f3e2b73460
commit d6ff83e20b

View File

@@ -1280,60 +1280,9 @@ qhtcp() {
done
fi
# Sets STUDIES_NUMS and NUM_STUDIES (yes this makes sense)
# Sets STUDIES_NUMS
get_studies "$STUDY_INFO_FILE"
# Construct the next auto-entry
# 1,ExpName1,NA,NA,UserInitials
next_study_num=$(( NUM_STUDIES + 1 ))
# If the next Exp dir already exists don't use it
while [[ -d $QHTCP_PROJECT_DIR/Exp$next_study_num ]]; do
(( next_study_num++ ))
done
# Use initials from project or whoami?
# Best I can do is first two letters of username
# See TODO in markdown
initials="${USER:0:2}"
INITIALS=${initials^^}
next_study_entry="$next_study_num,$PROJECT_SUFFIX,NA,NA,$INITIALS"
debug "$next_study_entry"
# Print current studies
[[ -f $STUDY_INFO_FILE ]] &&
echo "Current studies from $STUDY_INFO_FILE: " &&
cat "$STUDY_INFO_FILE"
# Ask user to edit STUDY_INFO_FILE
if ! ((YES)) && ask "Would you like to edit $STUDY_INFO_FILE to add or modify studies?"; then
cat <<-EOF
Give each experiment labels to be used for the plots and specific files.
Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps
Auto-entry suggestion: $next_study_entry
EOF
if ask "Would you like to add (y) the auto-entry suggestion to $STUDY_INFO_FILE or edit STUDY_INFO_FILE in nano (n)?"; then
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
else
debug "nano $STUDY_INFO_FILE"
nano "$STUDY_INFO_FILE"
fi
fi
# Initialize missing dirs
STUDIES_DIRS=()
for s in "${STUDIES_NUMS[@]}"; do
STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s"
STUDIES_DIRS+=("$STUDY_DIR")
if ! [[ -d $STUDY_DIR ]]; then
if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
continue
fi
fi
done
unset STUDY_DIR
# Replacing ExpFrontend.m
choose_easy_results_dir
@@ -1391,7 +1340,11 @@ remc() {
"$1" # studyInfo file
"${@:2}" \
&& java_extract \
"$QHTCP_PROJECT_DIR/out/" \
"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab" \
"$APPS_DIR/java/ORF_List_Without_DAmPs.txt" \
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv" \
"$QHTCP_PROJECT_DIR" \
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \
&& r_add_shift_values \
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \
"$QHTCP_PROJECT_DIR/Shift_only.csv" \
@@ -1399,7 +1352,7 @@ remc() {
"$QHTCP_PROJECT_DIR/REMcWithShift.csv" \
&& r_create_heat_maps \
"$QHTCP_PROJECT_DIR/REMcWithShift.csv" \
"$QHTCP_PROJECT_DIR/out" \
"$QHTCP_PROJECT_DIR" \
&& r_heat_maps_homology \
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \
"$APPS_DIR/r/170503_DAmPs_Only.txt" \
@@ -1483,7 +1436,7 @@ gta() {
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results
# Sets STUDIES_NUM and NUM_STUDIES
# Sets STUDIES_NUMS
get_studies "$STUDY_INFO_FILE"
[[ -d $gta_out_dir ]] || mkdir "$gta_out_dir"
@@ -1631,7 +1584,7 @@ submodule r_gta_pairwiselk
#
# @arg $1 string First Exp# name
# @arg $2 string Second Exp# name
# @arg $3 string StudyInfo.txt file
# @arg $3 string StudyInfo.csv file
# @arg $4 string output directory
#
r_gta_pairwiselk() {
@@ -1772,27 +1725,33 @@ submodule java_extract
#
# * Closed-source w/ hardcoded output directory, so have to pushd/popd to run (not ideal)
#
# @arg $1 string The output directory
# @arg $1 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab
# @arg $2 string ORF_List_Without_DAmPs.txt
# @arg $3 string REMcRdy_lm_only.csv
# @arg $4 string The output directory
# @arg $5 string The output file
# @exitcode 0 if expected output file exists
# @exitcode 1 if expected output file does not exist
java_extract() {
debug "Running: ${FUNCNAME[0]}"
classpath="$APPS_DIR/java/javaExtract.jar"
# backup REMcRdy_lm_only.csv-finalTable.csv
if ! backup "$out_file"; then
ask "Backup of $out_file failed, continue?" || return 1
# backup previous output
if ! backup "${5:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv"}"; then
ask "Backup of ${5:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv"} failed, continue?" || return 1
fi
java_cmd=(
"$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath "$classpath" ExecMain
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv"
"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"
"$APPS_DIR/java/ORF_List_Without_DAmPs.txt" 1 true true
"${3:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv"}"
"${1:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
"${2:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
)
debug "pushd && ${java_cmd[*]} && popd"
pushd "$1" && "${java_cmd[@]}" && popd || return 1
out_file="$1/REMcRdy_lm_only.csv-finalTable.csv"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
pushd "${4:-"$QHTCP_PROJECT_DIR"}" && "${java_cmd[@]}" && popd || return 1
[[ -f ${5:-$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv} ]]
}
@@ -1964,18 +1923,94 @@ submodule get_studies
# @exitcode 0 If one or more studies found
# @exitcode 1 If no studies found
# @set STUDIES_NUMS array Contains Exp numbers
# @set NUM_STUDIES int Number of existing studies
# @arg $1 string File to read
#
get_studies() {
debug "Running: ${FUNCNAME[0]}"
declare -ga STUDIES_NUMS=()
# Use initials from project or whoami?
# Best I can do is first two letters of username
# See TODO in markdown
initials="${USER:0:2}"
INITIALS=${initials^^}
empty_study=0
# Find an Exp directory that does not exist
while [[ -d $QHTCP_PROJECT_DIR/Exp$empty_study ]]; do
(( empty_study++ ))
done
next_study_entry="$empty_study,$PROJECT_SUFFIX,NA,NA,$INITIALS"
if [[ ! -f $1 ]]; then
echo "Creating default $1"
echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$1"
echo "$next_study_entry" >> "$1"
next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS"
fi
# Print current studies
cat <<-EOF
Give each experiment labels to be used for the plots and specific files.
Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps
Current StudyInfo file ($1) contents:
$(cat "$S1")
EOF
# Allow user to add/edit the study info file
if ! ((YES)); then
for ((i=1; i<2; i++)); do
echo "Auto-entry suggestion: $next_study_entry"
echo "Would you like to (a)dd the auto-entry, (e)dit the StudyInfo.csv file in nano, or (c)ontinue?"
read -r -p "Hit [Enter] or c to continue: " response
[[ -z $response ]] && break
case $response in
a)
echo "Adding auto-entry suggestion to $1"
echo "$next_study_entry" >> "$1"
next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS"
i=0
;;
e)
debug "nano $1"
nano "$1"
;;
c)
break
;;
*)
err "Invalid response, please try again"
i=0
;;
esac
break
done
fi
# Read study info file
while IFS=',' read -r col1 _; do # split on comma, get second col
STUDIES_NUMS+=("$col1")
done < <(tail -n +2 "$1") # skip header
[[ ${#STUDIES_NUMS[@]} -gt 0 ]] &&
NUM_STUDIES="${#STUDIES_NUMS{@}}"
[[ ${#STUDIES_NUMS[@]} -gt 0 ]]
# Initialize missing dirs
STUDIES_DIRS=()
for s in "${STUDIES_NUMS[@]}"; do
STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s"
STUDIES_DIRS+=("$STUDY_DIR")
[[ -d $STUDY_DIR ]] || mkdir "$STUDY_DIR"
# # We don't need a template anymore?
# if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
# err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
# continue
# fi
# fi
done
unset STUDY_DIR
}
@@ -2177,7 +2212,6 @@ main() {
declare -gx STUDIES_ARCHIVE_FILE="$OUT_DIR/StudiesDataArchive.txt"
declare -gx QHTCP_PROJECT_DIR="$OUT_DIR/$PROJECT_NAME"
declare -gx QHTCP_TEMPLATE_DIR="$TEMPLATES_DIR/qhtcp"
declare -gx STUDY_TEMPLATE_DIR="$TEMPLATES_DIR/exp"
declare -gx STUDY_INFO_FILE="$QHTCP_PROJECT_DIR/StudyInfo.csv"
declare -gx EASY_OUT_DIR="$QHTCP_PROJECT_DIR/easy"
declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
@@ -2186,7 +2220,7 @@ main() {
PROJECTS PROJECT_NAME \
PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \
STUDIES_ARCHIVE_FILE QHTCP_PROJECT_DIR QHTCP_TEMPLATE_DIR \
STUDY_TEMPLATE_DIR STUDY_INFO_FILE
STUDY_INFO_FILE
fi
debug "Active modules: ${MODULES[*]}"