Improve get_studies
This commit is contained in:
@@ -1280,60 +1280,9 @@ qhtcp() {
|
||||
done
|
||||
fi
|
||||
|
||||
# Sets STUDIES_NUMS and NUM_STUDIES (yes this makes sense)
|
||||
# Sets STUDIES_NUMS
|
||||
get_studies "$STUDY_INFO_FILE"
|
||||
|
||||
# Construct the next auto-entry
|
||||
# 1,ExpName1,NA,NA,UserInitials
|
||||
next_study_num=$(( NUM_STUDIES + 1 ))
|
||||
|
||||
# If the next Exp dir already exists don't use it
|
||||
while [[ -d $QHTCP_PROJECT_DIR/Exp$next_study_num ]]; do
|
||||
(( next_study_num++ ))
|
||||
done
|
||||
|
||||
# Use initials from project or whoami?
|
||||
# Best I can do is first two letters of username
|
||||
# See TODO in markdown
|
||||
initials="${USER:0:2}"
|
||||
INITIALS=${initials^^}
|
||||
next_study_entry="$next_study_num,$PROJECT_SUFFIX,NA,NA,$INITIALS"
|
||||
debug "$next_study_entry"
|
||||
|
||||
# Print current studies
|
||||
[[ -f $STUDY_INFO_FILE ]] &&
|
||||
echo "Current studies from $STUDY_INFO_FILE: " &&
|
||||
cat "$STUDY_INFO_FILE"
|
||||
|
||||
# Ask user to edit STUDY_INFO_FILE
|
||||
if ! ((YES)) && ask "Would you like to edit $STUDY_INFO_FILE to add or modify studies?"; then
|
||||
cat <<-EOF
|
||||
Give each experiment labels to be used for the plots and specific files.
|
||||
Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps
|
||||
|
||||
Auto-entry suggestion: $next_study_entry
|
||||
EOF
|
||||
if ask "Would you like to add (y) the auto-entry suggestion to $STUDY_INFO_FILE or edit STUDY_INFO_FILE in nano (n)?"; then
|
||||
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
|
||||
else
|
||||
debug "nano $STUDY_INFO_FILE"
|
||||
nano "$STUDY_INFO_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Initialize missing dirs
|
||||
STUDIES_DIRS=()
|
||||
for s in "${STUDIES_NUMS[@]}"; do
|
||||
STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s"
|
||||
STUDIES_DIRS+=("$STUDY_DIR")
|
||||
if ! [[ -d $STUDY_DIR ]]; then
|
||||
if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
|
||||
err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
done
|
||||
unset STUDY_DIR
|
||||
|
||||
# Replacing ExpFrontend.m
|
||||
choose_easy_results_dir
|
||||
@@ -1391,7 +1340,11 @@ remc() {
|
||||
"$1" # studyInfo file
|
||||
"${@:2}" \
|
||||
&& java_extract \
|
||||
"$QHTCP_PROJECT_DIR/out/" \
|
||||
"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab" \
|
||||
"$APPS_DIR/java/ORF_List_Without_DAmPs.txt" \
|
||||
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv" \
|
||||
"$QHTCP_PROJECT_DIR" \
|
||||
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \
|
||||
&& r_add_shift_values \
|
||||
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \
|
||||
"$QHTCP_PROJECT_DIR/Shift_only.csv" \
|
||||
@@ -1399,7 +1352,7 @@ remc() {
|
||||
"$QHTCP_PROJECT_DIR/REMcWithShift.csv" \
|
||||
&& r_create_heat_maps \
|
||||
"$QHTCP_PROJECT_DIR/REMcWithShift.csv" \
|
||||
"$QHTCP_PROJECT_DIR/out" \
|
||||
"$QHTCP_PROJECT_DIR" \
|
||||
&& r_heat_maps_homology \
|
||||
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \
|
||||
"$APPS_DIR/r/170503_DAmPs_Only.txt" \
|
||||
@@ -1483,7 +1436,7 @@ gta() {
|
||||
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
|
||||
zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results
|
||||
|
||||
# Sets STUDIES_NUM and NUM_STUDIES
|
||||
# Sets STUDIES_NUMS
|
||||
get_studies "$STUDY_INFO_FILE"
|
||||
|
||||
[[ -d $gta_out_dir ]] || mkdir "$gta_out_dir"
|
||||
@@ -1631,7 +1584,7 @@ submodule r_gta_pairwiselk
|
||||
#
|
||||
# @arg $1 string First Exp# name
|
||||
# @arg $2 string Second Exp# name
|
||||
# @arg $3 string StudyInfo.txt file
|
||||
# @arg $3 string StudyInfo.csv file
|
||||
# @arg $4 string output directory
|
||||
#
|
||||
r_gta_pairwiselk() {
|
||||
@@ -1772,27 +1725,33 @@ submodule java_extract
|
||||
#
|
||||
# * Closed-source w/ hardcoded output directory, so have to pushd/popd to run (not ideal)
|
||||
#
|
||||
# @arg $1 string The output directory
|
||||
# @arg $1 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab
|
||||
# @arg $2 string ORF_List_Without_DAmPs.txt
|
||||
# @arg $3 string REMcRdy_lm_only.csv
|
||||
# @arg $4 string The output directory
|
||||
# @arg $5 string The output file
|
||||
# @exitcode 0 if expected output file exists
|
||||
# @exitcode 1 if expected output file does not exist
|
||||
|
||||
java_extract() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
classpath="$APPS_DIR/java/javaExtract.jar"
|
||||
|
||||
# backup REMcRdy_lm_only.csv-finalTable.csv
|
||||
if ! backup "$out_file"; then
|
||||
ask "Backup of $out_file failed, continue?" || return 1
|
||||
# backup previous output
|
||||
if ! backup "${5:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv"}"; then
|
||||
ask "Backup of ${5:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv"} failed, continue?" || return 1
|
||||
fi
|
||||
|
||||
java_cmd=(
|
||||
"$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath "$classpath" ExecMain
|
||||
"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv"
|
||||
"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"
|
||||
"$APPS_DIR/java/ORF_List_Without_DAmPs.txt" 1 true true
|
||||
"${3:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv"}"
|
||||
"${1:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
|
||||
"${2:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
|
||||
)
|
||||
|
||||
debug "pushd && ${java_cmd[*]} && popd"
|
||||
pushd "$1" && "${java_cmd[@]}" && popd || return 1
|
||||
out_file="$1/REMcRdy_lm_only.csv-finalTable.csv"
|
||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||
pushd "${4:-"$QHTCP_PROJECT_DIR"}" && "${java_cmd[@]}" && popd || return 1
|
||||
[[ -f ${5:-$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv} ]]
|
||||
}
|
||||
|
||||
|
||||
@@ -1964,18 +1923,94 @@ submodule get_studies
|
||||
# @exitcode 0 If one or more studies found
|
||||
# @exitcode 1 If no studies found
|
||||
# @set STUDIES_NUMS array Contains Exp numbers
|
||||
# @set NUM_STUDIES int Number of existing studies
|
||||
# @arg $1 string File to read
|
||||
#
|
||||
get_studies() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
declare -ga STUDIES_NUMS=()
|
||||
# Use initials from project or whoami?
|
||||
# Best I can do is first two letters of username
|
||||
# See TODO in markdown
|
||||
initials="${USER:0:2}"
|
||||
INITIALS=${initials^^}
|
||||
empty_study=0
|
||||
|
||||
# Find an Exp directory that does not exist
|
||||
while [[ -d $QHTCP_PROJECT_DIR/Exp$empty_study ]]; do
|
||||
(( empty_study++ ))
|
||||
done
|
||||
|
||||
next_study_entry="$empty_study,$PROJECT_SUFFIX,NA,NA,$INITIALS"
|
||||
|
||||
if [[ ! -f $1 ]]; then
|
||||
echo "Creating default $1"
|
||||
echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$1"
|
||||
echo "$next_study_entry" >> "$1"
|
||||
next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS"
|
||||
fi
|
||||
|
||||
# Print current studies
|
||||
cat <<-EOF
|
||||
Give each experiment labels to be used for the plots and specific files.
|
||||
Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps
|
||||
|
||||
Current StudyInfo file ($1) contents:
|
||||
|
||||
$(cat "$S1")
|
||||
EOF
|
||||
|
||||
# Allow user to add/edit the study info file
|
||||
if ! ((YES)); then
|
||||
for ((i=1; i<2; i++)); do
|
||||
echo "Auto-entry suggestion: $next_study_entry"
|
||||
echo "Would you like to (a)dd the auto-entry, (e)dit the StudyInfo.csv file in nano, or (c)ontinue?"
|
||||
read -r -p "Hit [Enter] or c to continue: " response
|
||||
[[ -z $response ]] && break
|
||||
case $response in
|
||||
a)
|
||||
echo "Adding auto-entry suggestion to $1"
|
||||
echo "$next_study_entry" >> "$1"
|
||||
next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS"
|
||||
i=0
|
||||
;;
|
||||
e)
|
||||
debug "nano $1"
|
||||
nano "$1"
|
||||
;;
|
||||
c)
|
||||
break
|
||||
;;
|
||||
*)
|
||||
err "Invalid response, please try again"
|
||||
i=0
|
||||
;;
|
||||
esac
|
||||
break
|
||||
done
|
||||
fi
|
||||
|
||||
# Read study info file
|
||||
while IFS=',' read -r col1 _; do # split on comma, get second col
|
||||
STUDIES_NUMS+=("$col1")
|
||||
done < <(tail -n +2 "$1") # skip header
|
||||
|
||||
[[ ${#STUDIES_NUMS[@]} -gt 0 ]] &&
|
||||
NUM_STUDIES="${#STUDIES_NUMS{@}}"
|
||||
[[ ${#STUDIES_NUMS[@]} -gt 0 ]]
|
||||
|
||||
# Initialize missing dirs
|
||||
STUDIES_DIRS=()
|
||||
for s in "${STUDIES_NUMS[@]}"; do
|
||||
STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s"
|
||||
STUDIES_DIRS+=("$STUDY_DIR")
|
||||
[[ -d $STUDY_DIR ]] || mkdir "$STUDY_DIR"
|
||||
# # We don't need a template anymore?
|
||||
# if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
|
||||
# err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
|
||||
# continue
|
||||
# fi
|
||||
# fi
|
||||
done
|
||||
unset STUDY_DIR
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -2177,7 +2212,6 @@ main() {
|
||||
declare -gx STUDIES_ARCHIVE_FILE="$OUT_DIR/StudiesDataArchive.txt"
|
||||
declare -gx QHTCP_PROJECT_DIR="$OUT_DIR/$PROJECT_NAME"
|
||||
declare -gx QHTCP_TEMPLATE_DIR="$TEMPLATES_DIR/qhtcp"
|
||||
declare -gx STUDY_TEMPLATE_DIR="$TEMPLATES_DIR/exp"
|
||||
declare -gx STUDY_INFO_FILE="$QHTCP_PROJECT_DIR/StudyInfo.csv"
|
||||
declare -gx EASY_OUT_DIR="$QHTCP_PROJECT_DIR/easy"
|
||||
declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
|
||||
@@ -2186,7 +2220,7 @@ main() {
|
||||
PROJECTS PROJECT_NAME \
|
||||
PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \
|
||||
STUDIES_ARCHIVE_FILE QHTCP_PROJECT_DIR QHTCP_TEMPLATE_DIR \
|
||||
STUDY_TEMPLATE_DIR STUDY_INFO_FILE
|
||||
STUDY_INFO_FILE
|
||||
fi
|
||||
|
||||
debug "Active modules: ${MODULES[*]}"
|
||||
|
||||
Reference in New Issue
Block a user