diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index 8eb6edac..0a9a81e6 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -1280,60 +1280,9 @@ qhtcp() { done fi - # Sets STUDIES_NUMS and NUM_STUDIES (yes this makes sense) + # Sets STUDIES_NUMS get_studies "$STUDY_INFO_FILE" - # Construct the next auto-entry - # 1,ExpName1,NA,NA,UserInitials - next_study_num=$(( NUM_STUDIES + 1 )) - - # If the next Exp dir already exists don't use it - while [[ -d $QHTCP_PROJECT_DIR/Exp$next_study_num ]]; do - (( next_study_num++ )) - done - - # Use initials from project or whoami? - # Best I can do is first two letters of username - # See TODO in markdown - initials="${USER:0:2}" - INITIALS=${initials^^} - next_study_entry="$next_study_num,$PROJECT_SUFFIX,NA,NA,$INITIALS" - debug "$next_study_entry" - - # Print current studies - [[ -f $STUDY_INFO_FILE ]] && - echo "Current studies from $STUDY_INFO_FILE: " && - cat "$STUDY_INFO_FILE" - - # Ask user to edit STUDY_INFO_FILE - if ! ((YES)) && ask "Would you like to edit $STUDY_INFO_FILE to add or modify studies?"; then - cat <<-EOF - Give each experiment labels to be used for the plots and specific files. - Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps - - Auto-entry suggestion: $next_study_entry - EOF - if ask "Would you like to add (y) the auto-entry suggestion to $STUDY_INFO_FILE or edit STUDY_INFO_FILE in nano (n)?"; then - echo "$next_study_entry" >> "$STUDY_INFO_FILE" - else - debug "nano $STUDY_INFO_FILE" - nano "$STUDY_INFO_FILE" - fi - fi - - # Initialize missing dirs - STUDIES_DIRS=() - for s in "${STUDIES_NUMS[@]}"; do - STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s" - STUDIES_DIRS+=("$STUDY_DIR") - if ! [[ -d $STUDY_DIR ]]; then - if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then - err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR" - continue - fi - fi - done - unset STUDY_DIR # Replacing ExpFrontend.m choose_easy_results_dir @@ -1391,7 +1340,11 @@ remc() { "$1" # studyInfo file "${@:2}" \ && java_extract \ - "$QHTCP_PROJECT_DIR/out/" \ + "$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab" \ + "$APPS_DIR/java/ORF_List_Without_DAmPs.txt" \ + "$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv" \ + "$QHTCP_PROJECT_DIR" \ + "$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \ && r_add_shift_values \ "$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \ "$QHTCP_PROJECT_DIR/Shift_only.csv" \ @@ -1399,7 +1352,7 @@ remc() { "$QHTCP_PROJECT_DIR/REMcWithShift.csv" \ && r_create_heat_maps \ "$QHTCP_PROJECT_DIR/REMcWithShift.csv" \ - "$QHTCP_PROJECT_DIR/out" \ + "$QHTCP_PROJECT_DIR" \ && r_heat_maps_homology \ "$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv" \ "$APPS_DIR/r/170503_DAmPs_Only.txt" \ @@ -1483,7 +1436,7 @@ gta() { all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" zscores_file="${6:-"$gta_out_dir/zscores/zscores_interaction.csv"}" # TODO This could be wrong, it could be in main results - # Sets STUDIES_NUM and NUM_STUDIES + # Sets STUDIES_NUMS get_studies "$STUDY_INFO_FILE" [[ -d $gta_out_dir ]] || mkdir "$gta_out_dir" @@ -1631,7 +1584,7 @@ submodule r_gta_pairwiselk # # @arg $1 string First Exp# name # @arg $2 string Second Exp# name -# @arg $3 string StudyInfo.txt file +# @arg $3 string StudyInfo.csv file # @arg $4 string output directory # r_gta_pairwiselk() { @@ -1772,27 +1725,33 @@ submodule java_extract # # * Closed-source w/ hardcoded output directory, so have to pushd/popd to run (not ideal) # -# @arg $1 string The output directory +# @arg $1 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab +# @arg $2 string ORF_List_Without_DAmPs.txt +# @arg $3 string REMcRdy_lm_only.csv +# @arg $4 string The output directory +# @arg $5 string The output file +# @exitcode 0 if expected output file exists +# @exitcode 1 if expected output file does not exist + java_extract() { debug "Running: ${FUNCNAME[0]}" classpath="$APPS_DIR/java/javaExtract.jar" - # backup REMcRdy_lm_only.csv-finalTable.csv - if ! backup "$out_file"; then - ask "Backup of $out_file failed, continue?" || return 1 + # backup previous output + if ! backup "${5:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv"}"; then + ask "Backup of ${5:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv"} failed, continue?" || return 1 fi java_cmd=( "$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8 -classpath "$classpath" ExecMain - "$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv" - "$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab" - "$APPS_DIR/java/ORF_List_Without_DAmPs.txt" 1 true true + "${3:-"$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv"}" + "${1:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}" + "${2:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}" ) debug "pushd && ${java_cmd[*]} && popd" - pushd "$1" && "${java_cmd[@]}" && popd || return 1 - out_file="$1/REMcRdy_lm_only.csv-finalTable.csv" - [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) + pushd "${4:-"$QHTCP_PROJECT_DIR"}" && "${java_cmd[@]}" && popd || return 1 + [[ -f ${5:-$QHTCP_PROJECT_DIR/REMcRdy_lm_only.csv-finalTable.csv} ]] } @@ -1964,18 +1923,94 @@ submodule get_studies # @exitcode 0 If one or more studies found # @exitcode 1 If no studies found # @set STUDIES_NUMS array Contains Exp numbers -# @set NUM_STUDIES int Number of existing studies # @arg $1 string File to read # get_studies() { debug "Running: ${FUNCNAME[0]}" declare -ga STUDIES_NUMS=() + # Use initials from project or whoami? + # Best I can do is first two letters of username + # See TODO in markdown + initials="${USER:0:2}" + INITIALS=${initials^^} + empty_study=0 + + # Find an Exp directory that does not exist + while [[ -d $QHTCP_PROJECT_DIR/Exp$empty_study ]]; do + (( empty_study++ )) + done + + next_study_entry="$empty_study,$PROJECT_SUFFIX,NA,NA,$INITIALS" + + if [[ ! -f $1 ]]; then + echo "Creating default $1" + echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$1" + echo "$next_study_entry" >> "$1" + next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS" + fi + + # Print current studies + cat <<-EOF + Give each experiment labels to be used for the plots and specific files. + Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps + + Current StudyInfo file ($1) contents: + + $(cat "$S1") + EOF + + # Allow user to add/edit the study info file + if ! ((YES)); then + for ((i=1; i<2; i++)); do + echo "Auto-entry suggestion: $next_study_entry" + echo "Would you like to (a)dd the auto-entry, (e)dit the StudyInfo.csv file in nano, or (c)ontinue?" + read -r -p "Hit [Enter] or c to continue: " response + [[ -z $response ]] && break + case $response in + a) + echo "Adding auto-entry suggestion to $1" + echo "$next_study_entry" >> "$1" + next_study_entry="$((empty_study+1)),$PROJECT_SUFFIX,NA,NA,$INITIALS" + i=0 + ;; + e) + debug "nano $1" + nano "$1" + ;; + c) + break + ;; + *) + err "Invalid response, please try again" + i=0 + ;; + esac + break + done + fi + + # Read study info file while IFS=',' read -r col1 _; do # split on comma, get second col STUDIES_NUMS+=("$col1") done < <(tail -n +2 "$1") # skip header - [[ ${#STUDIES_NUMS[@]} -gt 0 ]] && - NUM_STUDIES="${#STUDIES_NUMS{@}}" + [[ ${#STUDIES_NUMS[@]} -gt 0 ]] + + # Initialize missing dirs + STUDIES_DIRS=() + for s in "${STUDIES_NUMS[@]}"; do + STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s" + STUDIES_DIRS+=("$STUDY_DIR") + [[ -d $STUDY_DIR ]] || mkdir "$STUDY_DIR" + # # We don't need a template anymore? + # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then + # err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR" + # continue + # fi + # fi + done + unset STUDY_DIR + } @@ -2177,7 +2212,6 @@ main() { declare -gx STUDIES_ARCHIVE_FILE="$OUT_DIR/StudiesDataArchive.txt" declare -gx QHTCP_PROJECT_DIR="$OUT_DIR/$PROJECT_NAME" declare -gx QHTCP_TEMPLATE_DIR="$TEMPLATES_DIR/qhtcp" - declare -gx STUDY_TEMPLATE_DIR="$TEMPLATES_DIR/exp" declare -gx STUDY_INFO_FILE="$QHTCP_PROJECT_DIR/StudyInfo.csv" declare -gx EASY_OUT_DIR="$QHTCP_PROJECT_DIR/easy" declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"} @@ -2186,7 +2220,7 @@ main() { PROJECTS PROJECT_NAME \ PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \ STUDIES_ARCHIVE_FILE QHTCP_PROJECT_DIR QHTCP_TEMPLATE_DIR \ - STUDY_TEMPLATE_DIR STUDY_INFO_FILE + STUDY_INFO_FILE fi debug "Active modules: ${MODULES[*]}"