qhtcp-workflow 97 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362
  1. #!/usr/bin/env bash
  2. # Copyright 2024 Bryan C. Roessler
  3. #
  4. # Allow indirect functions
  5. # shellcheck disable=SC2317
  6. #
  7. # @name Hartman Lab QHTCP Workflow
  8. # @brief An opinionated yet flexible QHTCP analysis framework for the Hartman Lab.
  9. #
  10. # @description
  11. #
  12. # See the [User Input](#user-input) section for getting started.
  13. #
  14. # Insert a general description of Q-HTCP and the Q-HTCP process here.
  15. shopt -s extglob # Turn on extended globbing
  16. shopt -s nullglob # Allow null globs
  17. DEBUG=${DEBUG:-1} # Turn debugging ON by default during development
  18. # @description Use `--help` to print the help message.
  19. # @internal
  20. print_help() {
  21. debug "Running: ${FUNCNAME[0]}"
  22. install_dependencies --get-depends # Loads the dependency arrays
  23. cat <<-EOF
  24. USAGE:
  25. qhtcp-workflow [[OPTION] [VALUE]]...
  26. --project, --module, --wrapper, and --nomodule can be passed multiple
  27. times or as comma-separated strings (see EXAMPLES below)
  28. OPTIONS:
  29. --project, -p PROJECT[,PROJECT...]
  30. PROJECT should follow the pattern ${PROJECT_PREFIX}_PROJECT
  31. --module, -m MODULE[,MODULE...]
  32. See MODULES section below for list of available modules
  33. If no --include is specified, all modules are run
  34. --wrapper, -w WRAPPER "[ARG1],[ARG2]..." (string of comma delimited arguments)
  35. See WRAPPERS section below for list of available modules
  36. See documentation for wrapper argument usage
  37. --nomodule, -n MODULE[,MODULE...]
  38. See MODULES and WRAPPERS section below for list of modules to exclude
  39. --markdown
  40. Generate the shdoc markdown README.md file for this program
  41. --yes, -y, --auto
  42. Always answer yes to questions (non-interactive mode)
  43. --debug, -d
  44. Print extra debugging info
  45. --help, -h
  46. Print this help message and exit
  47. MODULES:
  48. ${ALL_MODULES[*]}
  49. WRAPPERS:
  50. ${ALL_WRAPPERS[*]}
  51. EXAMPLES:
  52. qhtcp-workflow --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
  53. qhtcp-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[0]} --module ${ALL_MODULES[1]}
  54. qhtcp-workflow --project ${PROJECT_PREFIX}_MY_PROJECT --module ${ALL_MODULES[1]} --module ${ALL_MODULES[2]} --yes
  55. qhtcp-workflow --module=${ALL_MODULES[0]},${ALL_MODULES[1]}
  56. qhtcp-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT
  57. qhtcp-workflow --project=${PROJECT_PREFIX}_MY_PROJECT,${PROJECT_PREFIX}_MY_OTHER_PROJECT --module=${ALL_MODULES[1]},${ALL_MODULES[2]} --yes --debug
  58. qhtcp-workflow --project=${PROJECT_PREFIX}_MY_PROJECT --wrapper ${ALL_WRAPPERS[2]} \"/path/to/genefile.txt,/path/to/output/dir\" --wrapper ${ALL_WRAPPERS[3]} \"/path/to/sgofile\"
  59. EOF
  60. }
  61. # DEPENDENCIES:
  62. # deb: ${depends_deb[@]}
  63. # rpm: ${depends_rpm[@]}
  64. # brew: ${depends_brew[@]}
  65. # perl: ${depends_perl[@]}
  66. # R: ${depends_r[@]}
  67. # BiocManager: ${depends_bioc[@]}
  68. # @section Notes
  69. # @description
  70. #
  71. # ### TODO
  72. #
  73. # * Variable scoping is horrible right now
  74. # * I wrote this sequentially and tried to keep track the best I could
  75. # * Local vars have a higher likelihood of being lower case, global vars are UPPER
  76. # * See MODULE specific TODOs below
  77. #
  78. # ### General guidelines for writing external scripts
  79. #
  80. # * External scripts must be modular enough to handle input and output from multiple directories
  81. # * Don't cd in scripts (if you must, do it in a subshell!)
  82. # * Pass variables
  83. # * Pass options
  84. # * Pass arguments
  85. #
  86. # ## Project layout
  87. #
  88. # &nbsp;&nbsp;**qhtcp-workflow/**
  89. #
  90. # &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**scans/**
  91. #
  92. # * This directory contains raw image data and image analysis results for the entire collection of Q-HTCP experiments.
  93. # * Subdirectories within "scans" should represent a single Q-HTCP study and be named using the following convention: yyymmdd_username_experimentDescription
  94. # * Each subdirectory contains the Raw Image Folders for that study.
  95. # * Each Raw Image Folder contains a series of N folders with successive integer labels 1 to N, each folder containing the time series of images for a single cell array.
  96. # * It also contains a user-supplied subfolder, which must be named "MasterPlateFiles" and must contain two excel files, one named 'DrugMedia_experimentDescription' and the other named 'MasterPlate_experimentDescription'.
  97. # * If the standard MasterPlate_Template file is being used, it's not needed to customize then name.
  98. # * If the template is modified, it is recommended to rename it and describe accordingly - a useful convention is to use the same experimentDescription for the MP files as given to the experiment
  99. # * The 'MasterPlate_' file contain associated cell array information (culture IDs for all of the cell arrays in the experiment) while the 'DrugMedia_' file contains information about the media that the cell array is printed to.
  100. # * Together they encapsulate and define the experimental design.
  101. # * The QHTCPImageFolders and 'MasterPlateFiles' folder are the inputs for image analysis with EASY software.
  102. # * As further described below, EASY will automatically generate a 'Results' directory (within the ExpJobs/'ExperimentJob' folder) with a name that consists of a system-generated timestamp and an optional short description provided by the user (Fig.2). The 'Results' directory is created and entered, using the "File >> New Experiment" dropdown in EASY. Multiple 'Results' files may be created (and uniquely named) within an 'ExperimentJob' folder.
  103. #
  104. # &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**apps/easy/**
  105. #
  106. # * This directory contains the GUI-enabled MATLAB software to accomplish image analysis and growth curve fitting.
  107. # * EASY analyzes Q-HTCP image data within an 'ExperimentJob'' folder (described above; each cell array has its own folder containing its entire time series of images).
  108. # * EASY analysis produces image quantification data and growth curve fitting results for each cell array; these results are subsequently assembled into a single file and labeled, using information contained in the 'MasterPlate_' and 'DrugMedia_' files in the 'MasterPlateFiles' subdirectory.
  109. # * The final files (named '!!ResultsStd_.txt' or '!!ResultsELr_.txt') are produced in a subdirectory that EASY creates within the 'ExpJob#' folder, named '/ResultsTimeStampDesc/PrintResults' (Fig. 2).
  110. # * The /EASY directory is simply where the latest EASY version resides (additional versions in development or legacy versions may also be stored there).
  111. # * The raw data inputs and result outputs for EASY are kept in the 'ExpJobs' directory.
  112. # * EASY also outputs a '.mat' file that is stored in the 'matResults' folder and is named with the TimeStamp and user-provided name appended to the 'Results' folder name when 'New Experiment' is executed from the 'File' Dropdown menu in the EASY console.
  113. #
  114. # &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**apps/ezview/**
  115. #
  116. # * This directory contains the GUI-enabled MATLAB software to conveniently and efficiently mine the raw cell array image data for a Q-HTCP experiment.
  117. # * It takes the Results.m file (created by EASY software) as an input and permits the user to navigate through the raw image data and growth curve results for the experiment.
  118. # * The /EZview provides a place for storing the the latest EZview version (as well as other EZview versions).
  119. # * The /EZview provides a GUI for examining the EASY results as provided in the …/matResults/… .mat file.
  120. #
  121. # &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**Master Plates**
  122. #
  123. # * This optional folder is a convenient place to store copies of the 'MasterPlate_' and a 'DrugMedia_' file templates, along with previously used files that may have been modified and could be reused or further modified to enable future analyses.
  124. # * These two file types are required in the 'MasterPlateFiles' folder, which catalogs experimental information specific to individual Jobs in the ExpJobs folder, as described further below.
  125. #
  126. #
  127. #
  128. # @description
  129. # `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string
  130. # @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string
  131. # @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string
  132. # @option -w<value> | --wrapper=<value> One or more wrappers and its arguments to run, can be passed multiple times or with a comma-separated string
  133. # @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis
  134. # @option --markdown Generate the shdoc markdown file for this program
  135. # @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
  136. # @option -d | --debug Turn on extra debugging output
  137. # @option -h | --help Print help message and exit (overrides other options)
  138. # @set PROJECTS array List of projects to cycle through
  139. # @set MODULES array List of modules to run on each project
  140. # @set WRAPPERS array List of wrappers and their arguments to run on each project
  141. # @set EXCLUDE_MODULES array List of modules not to run on each project
  142. # @set DEBUG int Turn debugging on
  143. # @set YES int Turn assume yes on
  144. parse_input() {
  145. debug "Running: ${FUNCNAME[0]} $*"
  146. long_opts="project:,module:,wrapper:,nomodule:,markdown,yes,auto,debug,help"
  147. short_opts="+p:m:w:n:ydh"
  148. if input=$(getopt -o $short_opts -l $long_opts -- "$@"); then
  149. eval set -- "$input"
  150. while true; do
  151. case $1 in
  152. --project|-p)
  153. shift
  154. declare -ga PROJECTS
  155. IFS=',' read -ra PROJECTS <<< "$1"
  156. ;;
  157. --module|-m)
  158. shift
  159. declare -ga MODULES
  160. IFS=',' read -ra MODULES <<< "$1"
  161. ;;
  162. --wrapper|-w)
  163. shift
  164. declare -ga WRAPPERS
  165. IFS=',' read -ra WRAPPERS <<< "$1"
  166. ;;
  167. --nomodule|-n)
  168. shift
  169. declare -ga EXCLUDE_MODULES
  170. IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
  171. ;;
  172. --markdown)
  173. generate_markdown; exit 0 # TODO disable the exit after development
  174. ;;
  175. --yes|-y|--auto)
  176. declare -g YES=1
  177. ;;
  178. --debug|-d)
  179. declare -g DEBUG=1
  180. ;;
  181. --help|-h)
  182. print_help
  183. exit 0
  184. ;;
  185. --)
  186. shift
  187. break
  188. ;;
  189. esac
  190. shift
  191. done
  192. else
  193. err "Incorrect options provided"; exit 1
  194. fi
  195. }
  196. # @section Modules
  197. # @description
  198. #
  199. # A module contains a cohesive set of tasks, including:
  200. #
  201. # * Filesystem operations
  202. # * Variable setting
  203. # * Executing other modules
  204. # * Executing wrappers
  205. #
  206. # Use a module to:
  207. #
  208. # * Build a new type of analysis from scratch
  209. # * Generate project directories
  210. # * Group multiple wrappers (and modules) into a larger task
  211. # * Dictate the ordering of multiple wrappers
  212. # * Call wrappers with the appropriate arguments
  213. #
  214. # @description
  215. module() {
  216. debug "Adding $1 module"
  217. ALL_MODULES+=("$1")
  218. declare -gA "$1"
  219. }
  220. # @description Ask the user a yes/no question
  221. # @arg $1 string The question to ask
  222. # @exitcode 0 If yes
  223. # @exitcode 1 If no
  224. # @internal
  225. ask() {
  226. declare response
  227. (( YES )) && return 0
  228. read -r -p "$* [y/N]: " response
  229. [[ ${response,,} =~ ^(yes|y)$ ]]
  230. }
  231. err() { echo "Error: $*" >&2; }
  232. # @description Ask the user to input a name
  233. # @arg $1 string formatted name
  234. # @arg $2 string variable name
  235. # @exitcode 0 If set
  236. # @exitcode 1 If not
  237. # @internal
  238. ask_name() {
  239. debug "Running: ${FUNCNAME[0]} $*"
  240. declare -a to_add
  241. declare example_pn
  242. declare -g -a "$2"
  243. declare -n ref="$2"
  244. example_pn="${STUDY_PREFIX}_$(random_three_words)"
  245. cat <<-EOF
  246. ${underline}Enter a new or existing $1${nounderline}
  247. * Suggested prefix: ${STUDY_PREFIX}_
  248. * You may choose any combination of words/characters following the prefix, but be sensible.
  249. * Make it descriptive and avoid spaces and special characters.
  250. * Example: $example_pn
  251. EOF
  252. trys=3 # give the user up to 3 tries to enter a valid name
  253. for ((i=1; i<=trys; i++)); do
  254. read -r -e -p "Enter a new or existing $1: " -i "${STUDY_PREFIX}_" response
  255. if [[ -z $response ]]; then
  256. to_add+=("$example_pn")
  257. break
  258. else
  259. if sanitize_pn "$response"; then
  260. to_add+=("$response")
  261. echo "$response successfully added as a $1"
  262. i=0 # resetting trys counter in case user wants to add more than 3 names
  263. else
  264. err "Invalid project name: $response"
  265. echo "Retrying ($i of $trys)"
  266. fi
  267. fi
  268. done
  269. # shellcheck disable=SC2034
  270. ref=("${to_add[@]}")
  271. }
  272. # @description Sanitizer regex for prefix
  273. sanitize_pn() {
  274. [[ $1 =~ ^[0-9]{8}_.+_.+$ ]]
  275. }
  276. # @description handle debug output globally
  277. debug() { (( DEBUG )) && echo "Debug: $*"; }
  278. # @description Create a random three word string
  279. # Not super portable but nice to have
  280. random_three_words() {
  281. local -a arr
  282. adjectives=(
  283. "adorable" "adventurous" "agile" "amazing" "angry" "beautiful" "bold" "brave" "bright" "calm"
  284. "charming" "cheerful" "courageous" "creative" "delicate" "elegant" "energetic" "exciting" "fast" "friendly"
  285. "gentle" "happy" "healthy" "helpful" "honest" "humble" "intelligent" "jovial" "kind" "lively"
  286. "lovable" "magnificent" "mellow" "modest" "noble" "outgoing" "passionate" "peaceful" "powerful" "quick"
  287. "radiant" "reliable" "resourceful" "respectful" "shy" "smart" "strong" "sweet" "tender" "thoughtful"
  288. "timid" "unique" "upbeat" "vibrant" "warm" "wise" "wonderful" "youthful" "zealous" "eager"
  289. "friendly" "generous" "imaginative" "independent" "inspired" "joyful" "luminous" "mysterious" "playful" "serene"
  290. "spontaneous" "steady" "spirited" "stylish" "tough" "understanding" "vivid" "zany" "bold" "calm"
  291. "dynamic" "innovative" "proud" "reliable" "sincere" "strong" "talented" "trustworthy" "vivid" "zealous"
  292. )
  293. participles=(
  294. "abandoning" "absorbing" "accelerating" "achieving" "acquiring" "admiring" "advising" "agreeing"
  295. "allowing" "analyzing" "appearing" "applying" "arguing" "assembling" "assisting" "attracting"
  296. "believing" "browsing" "calculating" "calling" "caring" "celebrating" "cleaning" "climbing"
  297. "coaching" "collecting" "combining" "communicating" "competing" "confessing" "considering"
  298. "cooking" "correcting" "creating" "debating" "defining" "delivering" "designing" "discussing"
  299. "driving" "enjoying" "exploring" "feeling" "finishing" "fixing" "forming" "gathering" "growing"
  300. "guiding" "happening" "helping" "hoping" "improving" "increasing" "influencing" "involving"
  301. "learning" "leading" "looking" "managing" "measuring" "moving" "noticing" "observing" "offering"
  302. "organizing" "performing" "preparing" "presenting" "producing" "protecting" "questioning"
  303. "recommending" "recovering" "running" "saving" "searching" "seeing" "sharing" "solving"
  304. "starting" "studying" "succeeding" "supporting" "teaching" "thinking" "understanding" "using"
  305. "validating" "waiting" "working" "writing"
  306. )
  307. animals=(
  308. "antelope" "baboon" "badger" "bat" "bear" "beaver" "bison" "booby" "buffalo" "bull"
  309. "camel" "cat" "cheetah" "chicken" "chimpanzee" "clam" "cobra" "cougar" "cow" "crab"
  310. "crane" "crocodile" "crow" "deer" "dog" "dolphin" "dove" "duck" "eagle" "echidna"
  311. "eel" "elephant" "emu" "falcon" "ferret" "fish" "flamingo" "fox" "frog" "gazelle"
  312. "giraffe" "goat" "goose" "gorilla" "hare" "hawk" "hedgehog" "hippo" "horse" "hyena"
  313. "iguana" "impala" "jaguar" "kangaroo" "koala" "lion" "llama" "lobster" "lynx" "macaw"
  314. "manatee" "mole" "monkey" "moose" "mouse" "mule" "octopus" "okapi" "opossum" "ostrich"
  315. "otter" "owl" "panda" "panther" "parrot" "penguin" "pig" "platypus" "porcupine" "quail"
  316. "rabbit" "rat" "raven" "reindeer" "rhinoceros" "robin" "salmon" "seal" "shark" "sheep"
  317. "shrimp" "skunk" "sloth" "snail" "snake" "sparrow" "spider" "squid" "squirrel" "starling"
  318. "stingray" "swan" "tapir" "tiger" "toad" "toucan" "turtle" "vulture" "walrus" "wolverine"
  319. "wolf" "wombat" "zebra"
  320. )
  321. arr+=(
  322. "$(shuf -n1 -e "${adjectives[@]}")"
  323. "$(shuf -n1 -e "${participles[@]}")"
  324. "$(shuf -n1 -e "${animals[@]}")"
  325. )
  326. printf "%s_" "${arr[@]}" | sed 's/_$//'
  327. }
  328. # # @description Function to set a global array variable
  329. # # @arg $1 The name of the global array
  330. # # @arg $2 The values of the array
  331. # # @internal
  332. # set_global_array() {
  333. # debug "Running: ${FUNCNAME[0]} $*"
  334. # declare -n array="$1"
  335. # shift
  336. # # Declare the global array using the nameref
  337. # declare -g -a "${!arr}"
  338. # # Populate the array
  339. # arr=("$@")
  340. # }
  341. # @description Print an array in columns
  342. # @arg $1 array to print
  343. print_in_columns() {
  344. debug "Running: ${FUNCNAME[0]} $*"
  345. declare -n array="$1"
  346. declare num=${#array[@]}
  347. if [[ $num -gt 8 ]]; then
  348. # Calculate the number of elements in each column
  349. local num_columns=$(( (num + 1) / 2 ))
  350. # Determine the maximum width of the first column
  351. local max_width=0
  352. for ((i=0; i<num_columns; i++)); do
  353. # Check if the item exists
  354. if [[ -n "${array[i]}" ]]; then
  355. local item_length=${#array[i]}
  356. if [[ $item_length -gt $max_width ]]; then
  357. max_width=$item_length
  358. fi
  359. fi
  360. done
  361. # Print in two columns
  362. for ((i=0; i<num_columns; i++)); do
  363. # Print the first column
  364. if [[ -n "${array[i]}" ]]; then
  365. printf "%d. %-${max_width}s" $((i+1)) "${array[i]}"
  366. else
  367. printf "%${max_width}s" ""
  368. fi
  369. # Print the second column if it exists
  370. if [[ $((i + num_columns)) -lt $num ]]; then
  371. printf "\t%d. %s\n" $((i + num_columns + 1)) "${array[i + num_columns]}"
  372. else
  373. # Print a newline if no second column item
  374. echo
  375. fi
  376. done
  377. else
  378. # Print in a single column
  379. for ((i=0; i<num; i++)); do
  380. printf "%d. %s\n" $((i+1)) "${array[i]}"
  381. done
  382. fi
  383. }
  384. # @description More concise debugging
  385. # @arg $1 array command(s) to run
  386. # @exitcode 0 command successful
  387. # @exitcode 1 command not successful
  388. # @internal
  389. execute() {
  390. if debug "$*"; then
  391. "$@"
  392. else
  393. "$@" &>/dev/null
  394. fi
  395. }
  396. is_directory_empty() {
  397. local dir="$1"
  398. # Check if the directory exists and is a directory
  399. if [ ! -d "$dir" ]; then
  400. echo "Directory does not exist or is not a directory"
  401. return 1
  402. fi
  403. # Iterate over the files and directories inside the specified directory
  404. for _ in "$dir"/*; do
  405. # If we find at least one entry, it's not empty
  406. return 1
  407. done
  408. # If the loop completes without finding any entries, the directory is empty
  409. return 0
  410. }
  411. # @description Backup one or more files to an incremented .bk file
  412. #
  413. # **TODO**
  414. #
  415. # * Make backups hidden by prepending "."?
  416. #
  417. # @exitcode backup iterator max 255
  418. # @internal
  419. backup() {
  420. debug "Running: ${FUNCNAME[0]} $*"
  421. for f in "$@"; do
  422. [[ -e $f ]] || continue
  423. declare count=1
  424. while [[ -e $f.bk.$count ]]; do
  425. ((count++))
  426. done
  427. execute mv "$f" "$f.bk.$count"
  428. done
  429. }
  430. # @description Prints a helpful message add program start
  431. #
  432. # @internal
  433. interactive_header() {
  434. debug "Running: ${FUNCNAME[0]}"
  435. cat <<-EOF
  436. _ _ _ _ _
  437. | | | | | | | | | |
  438. | |__| | __ _ _ __| |_ _ __ ___ __ _ _ __ | | __ _| |__
  439. | __ |/ _ | __| __| _ _ \ / _ | _ \ | | / _ | _ \
  440. | | | | (_| | | | |_| | | | | | (_| | | | | | |___| (_| | |_) |
  441. |_| |_|\__,_|_| \__|_| |_| |_|\__,_|_| |_| |______\__,_|_.__/
  442. ___ _ _ _____ ____ ____
  443. / _ \| | | |_ _/ ___| _ \
  444. | | | | |_| | | || | | |_) |
  445. | |_| | _ | | || |___| __/
  446. \__\_|_| |_| |_| \____|_|
  447. Scans directory: $SCANS_DIR
  448. Output directory: $OUT_DIR
  449. Change the SCANS_DIR or OUT_DIR environment variable(s) to override
  450. Example: SCANS_DIR=/path/to/scans OUT_DIR=/path/to/out ./qhtcp-workflow $*
  451. EOF
  452. underline=$(tput smul)
  453. nounderline=$(tput rmul)
  454. echo "${underline}Modules${nounderline}"
  455. print_in_columns "ALL_MODULES"
  456. echo ""
  457. # Print wrappers
  458. echo "${underline}Wrappers${nounderline}"
  459. print_in_columns "ALL_WRAPPERS"
  460. echo ""
  461. # Print projects from the scans directory
  462. projects=("$SCANS_DIR"/*/)
  463. if [[ ${#projects[@]} -eq 0 ]]; then
  464. echo "No projects found in $SCANS_DIR"
  465. ask_name "project" "ADD_PROJECTS" && PROJECTS+=("${ADD_PROJECTS[@]}")
  466. else
  467. echo "${underline}Projects${nounderline}"
  468. projects=("${projects[@]%/}") # strip comma first!
  469. projects=("${projects[@]##*/}")
  470. print_in_columns "projects"
  471. fi
  472. echo ""
  473. # Module selection
  474. if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
  475. cat <<-EOF
  476. ${underline}Enter modules(s) to run${nounderline}
  477. * <Enter> for all
  478. * A comma-separated list of module numbers: 2,5,12
  479. * 0 for none (wrappers only)
  480. EOF
  481. ((YES)) || read -r -p "(all): " response
  482. echo ""
  483. if [[ -z $response ]]; then
  484. MODULES=("${ALL_MODULES[@]}")
  485. elif [[ $response -eq 0 ]]; then
  486. EXCLUDE_MODULES=("${ALL_MODULES[@]}")
  487. else
  488. IFS=',' read -ra arr <<< "$response"
  489. for i in "${arr[@]}"; do
  490. if [[ $i =~ ^[0-9]+$ ]]; then
  491. MODULES+=("${ALL_MODULES[$((i-1))]}")
  492. else
  493. err "Module number $i is invalid, skipping"
  494. fi
  495. done
  496. fi
  497. unset response arr i
  498. fi
  499. # If we're just installing dependencies, skip the rest
  500. [[ ${MODULES[*]} == "install_dependencies" ]] && return 0
  501. # Wrapper selection
  502. if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
  503. while :; do
  504. cat <<-EOF
  505. ${underline}Enter wrapper(s) to run followed by its arguments in a comma-separated string${nounderline}
  506. * <Enter> for none (break)
  507. * A comma-separated list of wrappers and their arguments
  508. * Quote the argument string if it contains whitespace
  509. * Example: \""${ALL_WRAPPERS[0]},arg1,arg2,arg3...\"
  510. EOF
  511. ((YES)) || read -r -p "(none): " response
  512. echo ""
  513. [[ -z $response ]] && break
  514. IFS=',' read -ra arr <<< "$response"
  515. WRAPPERS+=("${arr[@]}")
  516. unset response arr i
  517. done
  518. fi
  519. # Project selection
  520. if [[ ${#PROJECTS[@]} -eq 0 ]]; then
  521. num=${#projects[@]}
  522. if [[ $num -eq 0 ]]; then
  523. ask_name "project" "ADD_PROJECTS" && PROJECTS+=("${ADD_PROJECTS[@]}")
  524. else
  525. for ((c=1; c<2; c++)); do # safe loop that only runs once
  526. cat <<-EOF
  527. ${underline}Enter project number(s) to analyze${nounderline}
  528. * <Enter> for the latest project ($num)
  529. * A comma-separated list of project numbers: 2,5,12
  530. * 0 to add a new project
  531. EOF
  532. ((YES)) || read -r -p "($num): " response
  533. echo
  534. if [[ $response == 0 ]]; then
  535. ask_name "project" "ADD_PROJECTS" && PROJECTS+=("${ADD_PROJECTS[@]}")
  536. else
  537. response="${response:-$num}"
  538. IFS=',' read -ra arr <<< "$response"
  539. for i in "${arr[@]}"; do
  540. if [[ -n ${projects[$((i-1))]} ]]; then
  541. PROJECTS+=("${projects[$((i-1))]}")
  542. else
  543. err "Project number $i is invalid"
  544. fi
  545. done
  546. fi
  547. [[ ${#PROJECTS[@]} -eq 0 ]] && c=0 # repeat the loop
  548. done
  549. fi
  550. unset response arr i
  551. fi
  552. # Sanitize project names
  553. for i in "${!PROJECTS[@]}"; do
  554. if ! sanitize_pn "${PROJECTS[i]}"; then
  555. echo "Project name ${PROJECTS[i]} is invalid"
  556. ask_name "project" "ADD_PROJECTS" && unset "PROJECTS[i]" && PROJECTS+=("${ADD_PROJECTS[@]}")
  557. fi
  558. done
  559. }
  560. module install_dependencies
  561. # @description This module will automatically install the dependencies for running QHTCP.
  562. #
  563. install_dependencies() {
  564. debug "Running: ${FUNCNAME[0]} $*"
  565. # Dependency arrays
  566. depends_rpm=(
  567. graphviz pandoc pdftk-java gd-devel perl-CPAN shdoc nano
  568. rsync coreutils libcurl-devel openssl-devel harfbuzz-devel
  569. fribidi-devel R-core R-core-devel java)
  570. depends_deb=(
  571. graphviz pandoc pdftk-java libgd-dev perl shdoc nano rsync
  572. coreutils libcurl-dev openssl-dev libharfbuzz-dev libfribidi-dev
  573. r-base r-base-dev default-jre)
  574. depends_brew=(
  575. graphiz pandoc gd pdftk-java shdoc nano perl rsync coreutils
  576. harfbuzz fribidi r java)
  577. depends_perl=(
  578. Test::Warnings Test::Fatal File::Map Sub::Uplevel ExtUtils::Config
  579. ExtUtils::PkgConfig IPC::Run Module::Build::Tiny GD GO::TermFinder)
  580. depends_r=(
  581. BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
  582. dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc
  583. rmarkdown htmlwidgets gdata Hmisc future furrr)
  584. depends_bioc=(UCSC.utils org.Sc.sgd.db)
  585. [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars
  586. # Install system-wide dependencies
  587. echo "Installing system dependencies"
  588. echo "You may be prompted for your sudo password to install packages using your system package manager"
  589. echo "If you do not have sudo access, you may want to use toolbox"
  590. case "$(uname -s)" in
  591. Linux*|CYGWIN*|MINGW*)
  592. if command -v dnf &>/dev/null; then
  593. ask "Detected Linux RPM platform, continue?" || return 1
  594. sudo dnf install "${depends_rpm[@]}"
  595. elif command -v apt &>/dev/null; then
  596. ask "Detected Linux DEB platform, continue?" || return 1
  597. sudo apt install "${depends_deb[@]}"
  598. else
  599. echo "Sorry, your Linux platform is not supported for automatic dependency installation"
  600. echo "You will need to resolve dependencies manually"
  601. fi
  602. ;;
  603. Darwin*)
  604. ask "Detected Mac platform, continue?" || return 1
  605. export HOMEBREW_BREW_GIT_REMOTE="https://github.com/Homebrew/brew"
  606. curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh|bash
  607. brew install "${depends_brew[@]}"
  608. ;;
  609. *)
  610. echo "Your system could not be detected, please install dependencies manually"
  611. ;;
  612. esac
  613. # Install perl CPAN modules
  614. echo "Installing perl CPAN modules"
  615. echo "It is recommended to use the local::lib perl library if prompted"
  616. debug "cpan -I -i ${depends_perl[*]}"
  617. cpan -I -i "${depends_perl[@]}"
  618. # Install R packages
  619. echo "Installing R packages"
  620. # Make R library directory if it doesn't exist
  621. [[ -d "$R_LIBS_USER" ]] || execute mkdir -p "$R_LIBS_USER"
  622. depends_r_str=""
  623. depends_r_to_string() {
  624. for d in "${depends_r[@]}"; do
  625. depends_r_str+="$d\", \""
  626. done
  627. depends_r_str="${depends_r_str::-3}" # strip last , " (comma and quote)
  628. }
  629. depends_r_to_string
  630. # Install R packages
  631. for d in "${depends_r[@]}"; do
  632. debug "$RSCRIPT -e \"if (!require(\"$d\", quietly = TRUE)) {install.packages(\"$d\", dep=TRUE, lib=\"$R_LIBS_USER\", repos=\"https://cloud.r-project.org\")}\""
  633. "$RSCRIPT" -e "if (!require(\"$d\", quietly = TRUE)) {install.packages(\"$d\", dep=TRUE, lib=\"$R_LIBS_USER\", repos=\"https://cloud.r-project.org\")}"
  634. done
  635. # Install Bioc packages
  636. for d in "${depends_bioc[@]}"; do
  637. debug "$RSCRIPT -e \"BiocManager::install(\"$d\", lib=\"$R_LIBS_USER\")\""
  638. "$RSCRIPT" -e "BiocManager::install(\"$d\", lib=\"$R_LIBS_USER\")"
  639. done
  640. # Ask to make our custom R library the default
  641. if [[ $R_LIBS_USER == "$HOME/R/$SCRIPT_NAME" ]]; then
  642. line="export R_LIBS_USER=$R_LIBS_USER"
  643. if ! grep -qF "$line" "$HOME/.bashrc"; then
  644. echo "This script uses a local R library at $HOME/R/$SCRIPT_NAME"
  645. echo "You can install the R dependencies to this library using the install_dependencies module"
  646. fi
  647. if ((YES)) || ask "Would you like to make this R library the default for your user?"; then
  648. echo "Adding $line to your .bashrc"
  649. echo "If you use a different shell, update your R_LIBS_USER environment variable accordingly"
  650. echo "$line" >> ~/.bashrc
  651. fi
  652. else
  653. debug "R_LIBS_USER is set to a custom path"
  654. fi
  655. echo ""
  656. command -v "$MATLAB" &>/dev/null || echo "You will also need MATLAB installed for GUI modules"
  657. }
  658. module init_project
  659. # @description This function creates and initializes project directories
  660. #
  661. # This module:
  662. #
  663. # * Initializes a project directory in the scans directory
  664. #
  665. # **TODO**
  666. #
  667. # * Copy over source image directories from robot
  668. # * MasterPlate_ file **should not be an xlsx file**, no portability
  669. # * We can keep the existing xlsx code for old style fallback
  670. # * But moving forward should switch to csv or something open
  671. # * Do we need to sync a QHTCP template?
  672. #
  673. # **NOTES**
  674. #
  675. # * Copy over the images from the robot and then DO NOT TOUCH that directory except to copy from it
  676. # * Write-protect (read-only) if we need to
  677. # * Copy data from scans/images directory to the project working dir and then begin analysis
  678. # * You may think...but doesn't that 2x data?
  679. # * No, btrfs subvolume uses reflinks, only data that is altered will be duplicated
  680. # * Most of the data are static images that are not written to, so the data is deduplicated
  681. #
  682. init_project() {
  683. debug "Running: ${FUNCNAME[0]}"
  684. # Create a project scans dir
  685. [[ -d $PROJECT_SCANS_DIR ]] || { execute mkdir -p "$PROJECT_SCANS_DIR" || return 1; }
  686. # TODO eventually copy scans from robot but for now let's pause and wait for transfer
  687. echo "In the future we will copy scans from robot here"
  688. # read -r -p "Hit <Enter> to continue: "
  689. # Create the project results out directory
  690. [[ -d $PROJECT_RESULTS_DIR ]] || execute mkdir -p "$PROJECT_RESULTS_DIR"
  691. # Write skeleton files in csv
  692. # If we have to convert to xlsx later, so be it
  693. echo "TODO: in the future, offer to create the DrugMedia file here"
  694. # cat <<-EOF > "$DRUG_MEDIA_FILE"
  695. # EOF
  696. echo "TODO: in the future, offer to create the MasterPlate file here"
  697. # cat <<-EOF > "$MASTER_PLATE_FILE"
  698. # EOF
  699. }
  700. module easy
  701. # @description
  702. # Run the EASY matlab program
  703. #
  704. # INPUT
  705. #
  706. # * MasterPlate_.xls
  707. # * DrugMedia_.xls
  708. #
  709. # OUTPUT
  710. #
  711. # * out/PROJECT/easy/results_std.txt
  712. # * out/PROJECT/easy/results_elr.txt
  713. #
  714. # TODO
  715. #
  716. # * Don't create output in the scans folder, put it in an output directory
  717. # * The !!Results output files need standardized naming
  718. # * The input MasterPlate and DrugMedia sheets need to be converted to something standard like csv/tsv
  719. # * This would allow them to be created programmatically as well
  720. #
  721. # NOTES
  722. #
  723. # * I've modularized EASY to fit into this workflow but there may be things broken (especially in "stand-alone" mode)
  724. # * The scans/images and 'MasterPlateFiles' folder are the inputs for image analysis with EASY software.
  725. # * EASY will automatically generate a 'Results' directory (within the ExpJobs/'ExperimentJob' folder) w/ timestamp and an optional short description provided by the user (Fig.2).
  726. # * The 'Results' directory is created and entered, using the "File >> New Experiment" dropdown in EASY.
  727. # * Multiple 'Results' files may be created (and uniquely named) within an 'ExperimentJob' folder.
  728. #
  729. # INSTRUCTIONS
  730. #
  731. # * This program should handle the relevant directory and file creation and load the correct project into EASY
  732. #
  733. # #### Pin-tool mapping
  734. #
  735. # * Select at least two images from your experiment (or another experiment) to place in a 'PTmapFiles' folder.
  736. # * Sometimes an experiment doesn't have a complete set of quality spots for producing a pin tool map that will be used to start the spot search process.
  737. # * In this case the folder for Master Plate 1 (MP 1) is almost good but has a slight problem.
  738. # * At P13 the spot is extended. We might use this one but would like to include others that are more centered if possible.
  739. # * The other plates with higher drug concentrations could be used, but since a recent experiment has a better reference plate image, we will add it to the set of images to produce the pin tool map.
  740. #
  741. # ![Bad Pin Map](docs/imgs/easy/1-bad-pin-map.png "See the problem in Column 1, Row 13?")
  742. #
  743. # * We will find a good image from another experiment
  744. #
  745. # ![Good Pin Map](docs/imgs/easy/2-good-pin-map.png "Nice pin map")
  746. #
  747. # * We now have some images to generate a composite centered map for EASY to search and find the nucleation area of each spot as it forms.
  748. # * Click the Run menu tab.
  749. # * A drop down list of options is presented.
  750. # * Click the first item → [Plate Map Pintool ].
  751. #
  752. # ![Open PTmaps](docs/imgs/easy/3-open-ptmaps-dir.png "Where to find PTmaps")
  753. #
  754. # * Open the PTmapFiles folder.
  755. # * Then click on the .bmp files you wish to include to make the pin tool map.
  756. # * Click the Open button.
  757. #
  758. # ![Open BMPs](docs/imgs/easy/4-select-bmps.png "Open BMPs")
  759. #
  760. # * A warning dialog box may appear.
  761. # * This is nothing to be concerned about.
  762. # * Click OK and continue.
  763. #
  764. # ![Direct Map](docs/imgs/easy/5-direct-map.png "Map view, very pretty")
  765. #
  766. # * 'Retry' takes you back so that to can select a different .bmp files from which to create the map from.
  767. # * In this case the spots from the two images are well aligned and give coverage to all the spots therefore we do not have to add new images.
  768. # * Remember, this map is just a good guess as to where to start looking for each spot not where it will focus to capture intensities.
  769. # * Click 'Open' again.
  770. #
  771. # ![Open BMPs](docs/imgs/easy/4-select-bmps.png "Open BMPs")
  772. #
  773. # * We can now shift these values to get a better 'hard' start for this search.
  774. # * Maybe we can move this search point to the left a bit by decreasing the 'Initial Col Position' slightly to 120 and clicking continue.
  775. #
  776. # ![Open BMPs](docs/imgs/easy/6-shift-values.png "Open BMPs")
  777. #
  778. # * Even though the first result image using 126 may have given a good map, we will use the improve second initiation point by clicking the 'Continue/Finish' button.
  779. #
  780. # ![Shifted map](docs/imgs/easy/7-shifted-map.png "See the dots are redder and centered")
  781. #
  782. # * Note that the red “hot” spot is now well centered in each composite image spot.
  783. # * We can now click 'Continue / Finish' to proceed.
  784. # * The coordinates and parameters will be stored in the results folder 'PTmats'.
  785. # * This is where the .mat files which contain localization data for use in the next section of our work.
  786. # * The EASY GUI will come back.
  787. # * Now click the 'Run' → 'Image Curve ComboAnalysi'.
  788. # * This will perform the image quantification and then generate the curve fits for each plate selected.
  789. # * Typically we pick only one plate at a time for one or two master plates.
  790. # * The software will present the final image of the search if only 1 master plate is selected.
  791. # * If multiple plates are selected, no search images will be presented or stored as figures.
  792. # * However all the position data for every spot at every time point will be stored.
  793. # * This large data trove is used by EZview to produce the image click-on hot spot maps and the photo strips.
  794. #
  795. # ![Curve analysis](docs/imgs/easy/8-curve-analysis.png "Curve analysis")
  796. #
  797. # * Note the 'Select Files' dialog.
  798. # * It allow the user to select the specific .bmp files to use.
  799. # * This can be useful if there are bad ones that need to be removed from the folder due to contamination.
  800. # * If all are good we can select them all and click 'Open' to run the process.
  801. # * There are other parameters that can be selected.
  802. # * For now we will continue and come back to those later.
  803. #
  804. # ![Select all BMPs](docs/imgs/easy/9-select-all-dialog.png "Select all BMPs")
  805. #
  806. # ![Contamination view](docs/imgs/easy/10-contamination-view.png "Contamination view")
  807. #
  808. # * The search focus of each spot at the end of the run is presented for examination
  809. # * Notice that these have floated and locked in to a position determined on the fly to a point where the initial growth has exceed has reach a point of maturity.
  810. # * This prevents a jump to a late onset jump to a contamination site.
  811. # * If we found that we need to adjust our pin tool map or make other modifications, we can do that and rerun these single runs until we are satisfied.
  812. #
  813. # ![Search focus](docs/imgs/easy/11-search-focus.png "Search focus")
  814. #
  815. # * Next we will run the entire experiment by clicking on all the other master plates from the list box.
  816. #
  817. # ![Run experiment](docs/imgs/easy/12-run-experiment.png "Run experiment")
  818. #
  819. # * Depending on the number of master plates and the number of time point images taken for each, this next step can take a while.
  820. # * Click continue and do something else while the computer works.
  821. # * When the work is complete the EASY GUI will reappear without the master plate list.
  822. # * Now look in the /Results* /PrintResults folder to check that all the plates have run and produced data.
  823. #
  824. # ![Experiment complete](docs/imgs/easy/13-complete-experiment.png "Complete experiment")
  825. #
  826. # * This is a legacy print copy of data, but is still useful to check that all the quantification was completed successfully.
  827. #
  828. # ![Check results](docs/imgs/easy/14-check-results.png "Check results")
  829. #
  830. # #### Generate Reports
  831. #
  832. # * Generate a MPDM.mat file from the Excel master plate and drug media sheets that the user prepared as part of the experiment preparation.
  833. # * These sheets must conform to certain format rules.
  834. # * It is best when creating these to use a working copy as a template and replace the data with that of the current experiment.
  835. # * See Master Plate and Drug-Media Plate topic for details.
  836. # * Click on the 'GenReports' menu tab and a drop down menu is presented the the first item 'DrugMediaMP Generate .mat'.
  837. # * This will take you to the /MasterPlateFiles folder within the experiment currently being analyzed.
  838. # * Do as the dialog box instructs. Select the Master Plate Excel file first.
  839. # * Important note: These files (both for master plates and drug-medias) must be generated or converted to the Excel 95 version to be read in Linux.
  840. # * This can be done on either a Windows or an Apple machine running Excel.
  841. #
  842. #
  843. # ![Navigate to MasterPlateFiles](docs/imgs/easy/15-generate-mpdm-mat1.png "Navigate to MasterPlateFiles")
  844. #
  845. # ![Create a new MPDM.mat file](docs/imgs/easy/16-generate-mpdm-mat2.png "Create a new MPDM.mat file")
  846. #
  847. # ![Navigate to MasterPlateFiles](docs/imgs/easy/17-generate-mpdm-mat3.png "Navigate to MasterPlateFiles")
  848. #
  849. # ![Click OK](docs/imgs/easy/18-generate-mpdm-mat4.png "Click OK")
  850. #
  851. # * A message dialog pops up.
  852. # * Click 'OK'.
  853. #
  854. # ![Navigate to MasterPlateFiles](docs/imgs/easy/19-generate-mpdm-mat5.png "Navigate to MasterPlateFiles")
  855. #
  856. # * Next click on the 'GenReports' menu tab and the second item in the drop down list 'ResultsDB Generate'.
  857. #
  858. # ![Generate Reports](docs/imgs/easy/20-gen-reports.png "Generate Reports")
  859. #
  860. # * A dialog box with options appears.
  861. # * The default is 'Both'.
  862. # * 'Res' produces only a standard result sheet in the current experiments /Results*/PrintResults folder.
  863. # * 'DB' produces only a special file for convenient upload to databases.
  864. # * This file has no blank rows separating the plates and combines the raw data for each line item into a 'blob' as this is a convenient way to store data of variant lengths in a single database field.
  865. # * The concatenation of data for each row take a while. But is useful for uploading data.
  866. # * Typically 'Both' is the preferred option, however, if one needs to review the results quickly, this provides that option.
  867. #
  868. # * We can open the !!Results MI 16_0919 yor1-1 copy.txt text file using Libre Open Office to review the results.
  869. #
  870. # ![Results file](docs/imgs/easy/21-results-file.png "Results file")
  871. #
  872. # * We can do likewise with the !!Dbase_MI 16_0919_yor1-2 copy.txt text file.
  873. #
  874. # ![Db file](docs/imgs/easy/22-dbase-file.png "Db file")
  875. #
  876. # * Note that there are no headers or empty rows.
  877. # * Since Libre may corrupt the text files, it could be advisable to only read them and refrain from any 'Save' options presented.
  878. #
  879. # #### Master Plate and Drug Media Spreadsheets
  880. #
  881. # * The Master Plate and Drug- Media Spreadsheets correlate to the collected and calculated data with the defining definitions of the cultures, drugs and media involved in producing the experimental data.
  882. # * These spreadsheets have a very specific format which was instigated at the beginning of our work.
  883. # * To maintain compatibility over the years, we maintain that format.
  884. # * To begin with, our system can be used with Windows, Linux and Apple operating systems.
  885. # * To accommodate these OS's, the Excel version must be an older Excel 95 version which is cross compatible for Matlab versions within all three major OS's.
  886. # * Windows is more tolerant, but to avoid problems producing results reports, ALWAYS use the Excel 95 format for your spreadsheets.
  887. # * Do not remove any header rows. They can be modified with exception of the triple hash (###).
  888. # * Do not change the number or order of the columns.
  889. # * Next place a 'space' in unused empty spreadsheet entry positions.
  890. # * This can cause problems in general for some software utilities.
  891. # * It is just best to make this a standard practice.
  892. # * Avoid using special characters.
  893. # * Depending on the OS and software utility (especially database utilities), these can be problematic.
  894. # * Certain 'date' associated entries such as Oct1 or OCT1 will be interpreted by Excel as a date and automatically formatted as such.
  895. # * Do not use Oct1 (which is a yeast gene name) instead use Oct1_ or it's ORF name instead.
  896. # * When creating a Master Plate spreadsheet, it is best to start with a working spreadsheet template and adjust it to your descriptive data.
  897. # * Be sure that ### mark is always present in the first column of the header for each plate.
  898. # * This is important convention as it is used to defined a new plate set of entry data.
  899. # * Each plate is expected to have 384 rows of data correlated with the 384 wells of the source master plates.
  900. # * These have a particular order going through all 24 columns each row before proceeding to the next row.
  901. # * Gene names and ORF name entries should be as short as possible (4-5 character max if possible) as these are used repeatedly as part of concatenated descriptors.
  902. # * The 'Replicate' field and the 'Specifics' fields can be used for additional information.
  903. # * The 'Replicate' field was originally designed to allow the user to sort replicates but it can be used for other relevant information.
  904. # * The 'Specifics' field was created to handle special cases where the liquid media in which cultures were grown on a single source plate was selectively varied.
  905. # * This gives the researcher a way to sort by growth media as well as gene or ORF name.
  906. # * It can also be used to sort other properties instead of modifying the gene name field.
  907. # * Thoughtful experiment design and layout are important for the successful analysis of the resultant data.
  908. # * It is typically a good idea to create at least one reference full plate and make that plate the first source master plate.
  909. # * Typically we give those reference cultures the 'Gene Name' RF1.
  910. # * Traditionally we also made a second full reference plate with its cultures labeled RF2.
  911. # * More recently some researchers have gone to dispersing RF1 control reference cultures throughout the source master plates series in addition to the first full source master plate.
  912. # * The EZview software has been updated accordingly to find these references and perform associated calculations.
  913. #
  914. # ![Master Plate file](docs/imgs/easy/23-mp-file.png "Master Plate file")
  915. #
  916. # * There are a number of fields on the spreadsheet which in this case were left empty.
  917. # * This spreadsheet format was created originally with studies of whole yeast genome SGA modifications incorporated.
  918. # * Therefore all fields may not be relevant.
  919. # * However, when ever relevant it is strongly advised to fill in all the appropriate data.
  920. # * The Drug-Media spreadsheet defines the perturbation components of each type of agar plate that the source master plates are printed on.
  921. # * Again the format adherence is essential.
  922. # * There is a '1' in the first column- second row (A2).
  923. # * This has as legacy going back to early use.
  924. # * It is still necessary and should not be deleted.
  925. # * The header row must not be deleted.
  926. # * A triple hash(###) must be placed in the cell below the last entry in the Drug field (Column 2).
  927. # * Again insert a 'space' in each unused or empty cell in each field.
  928. # * Again avoid special characters which may cause problems if not in the experiment quantification in subsequent analysis utilities.
  929. # * A utility looking for a text field may end up reading a null and respond inappropriately.
  930. # * As with the master plate Excel sheet, it is a good idea to use a working copy of an existing Drug-Media spreadsheet and adapt it to ones needs.
  931. #
  932. # ![Drug Media file](docs/imgs/easy/24-dm-file.png "Drug Media file")
  933. #
  934. #
  935. #
  936. # To analyze a new Q-HTCP experiment:
  937. #
  938. # * Open the EASY Software.
  939. # * Open 'EstartConsole.m' with MATLAB
  940. # * Click the Run icon (play button)
  941. # * When prompted, click "Change Folder" (do not select "Add to Path").
  942. # * In the pop-up display, select from the 'File' dropdown: 'New Experiment'.
  943. # * From the pop-up, choose where to save the new file.
  944. # * Navigate to the relevant job in the ExpJobs folder, name the file accordingly, and click 'save'.
  945. # * The newly created .mat file in the newly created Results folder will automatically be loaded.
  946. # * The file name will then be automatically appended by the code with the current date information (e.g. 'A1.mat' will become 'Results2023-07-19A1)
  947. # * If the experiment has already been created, it can be reloaded by clicking 'Load Experiment' instead of 'New Experiment' and selecting the relevant results
  948. # * In the pop-up display, click on the 'Run' dropdown menu and select 'Image CurveFit ComboAnalysis'.
  949. # * In the updated pop-up, choose/highlight all desired image folders for analysis (this is generally all of the folders, since only the ones that need analysis should be there) and then click on 'continue'.
  950. # * As the program is running, updates will periodically appear in the Command Window; there will be an initial pause at "Before call to NIscanIntens…..".
  951. # * When the curve fitting is finished, the EASY console will pop back up.
  952. # * Check to see the completed analysis results in the newly created 'PrintResults' Folder, inside of the 'Results' Folder.
  953. # * Other folders ('CFfigs', 'figs', 'Fotos') are created for later optional use and will be empty.
  954. # * **NOTE:** The image analysis is completed independent of labeling the data (strains, media type, etc. Labeling happens next with the 'GenReports' function).
  955. # * Click on the 'GenReports' dropdown and select 'DrugMediaMP Generate .mat'
  956. # * **NOTE:** The 'MasterPlate' and 'DrugMedia' files have very specific formats and should be completed from a template.
  957. # * The Masterplate file must be exact (it must contain all and only the strains that were actually tested).
  958. # * For example, if only part of a library is tested, the complete library file must be modified to remove irrelevant strains.
  959. # * You will be prompted to first select the 'MasterPlate' file. You will need to navigate away from the working directory to get to it.
  960. # * It is fine for the 'MasterPlate_' file to be .xlsx (or .xls), and if you don't see it in the popup window, then change the file type from '.xls' to "all files" and then select it.
  961. # * Once it is selected, a report of the number of master plates in the file will pop up; when the report appears, assuming it is correct, click on 'OK'.
  962. # * You will then be prompted to select the 'DrugMedia' file from the relevant job folder. You will automatically return to the correct prior directory location.
  963. # * Choose it and click 'OK'. You may see a warning about column headers being modified, but that's ok.
  964. # * This will create an additional file in the 'MasterPlatesFiles' folder named 'MPDMmat.mat'
  965. # * Click on the 'GenReports' dropdown and select 'Results_Generate.'
  966. # * You will first see '!!ResultsElr_.txt' generated in the 'PrintResults' folder.
  967. # * Refreshing will reveal an increasing file size until you see the '!!ResultsStd_.txt' being generated.
  968. # * When finished, the '!!ResultsStd_.txt' will be about the same file size and it should be used in the following StudiesQHTCP analysis.
  969. # * 'NoGrowth_.txt', and 'GrowthOnly_.txt' files will be generated in the 'PrintResults' folder.
  970. #
  971. #
  972. #
  973. #
  974. # Issues:
  975. # * We need full documentation for all of the current workflow. There are different documents that need to be integrated. This will need to be updated as we make improvements to the system.
  976. # * MasterPlate_ file must have ydl227c in orf column, or else it Z_interaction.R will fail, because it can't calculate shift values.
  977. # * Make sure there are no special characters; e.g., (), “, ', ?, etc.; dash and underscore are ok as delimiters
  978. # * Drug_Media_ file must have letter character to be read as 'text'.
  979. # * MasterPlate_ file and DrugMedia_ are .xlsx or .xls, but !!Results_ is .txt.
  980. # * In Z_interactions.R, does it require a zero concentration/perturbation (should we use zero for the low conc, even if it's not zero), e.g., in order to do the shift correctly.
  981. # * Need to enable all file types (not only .xls) as the default for GenerateResults (to select MP and DM files as .xlsx).
  982. # * Explore differences between the ELR and STD files - 24_0414; John R modified Z script to format ELR file for Z_interactions.R analysis.
  983. # * To keep time stamps when transferring with FileZilla, go to the transfer drop down and turn it on, see https://filezillapro.com/docs/v3/advanced/preserve-timestamps/
  984. # * Could we change the 'MasterPlateFiles' folder label in EASY to 'MasterPlate_DrugMedia' (since there should be only one MP and there is also a DM file required?
  985. # * I was also thinking of adding a 'MasterPlateFilesOnly' folder to the QHTCP directory template where one could house different MPFiles (e.g., with and without damps, with and without Refs on all MPs, etc; other custom MPFiles, updated versions, etc)
  986. # * Currently updated files are in '23_1011_NewUpdatedMasterPlate_Files' on Mac (yeast strains/23_0914…/)
  987. # * For EASY to report cell array positions (plate_row_column) to facilitate analyzing plate artifacts. The MP File in Col 3 is called 'LibraryLocation' and is reported after 'Specifics' in the !!Results.
  988. # * Can EASY/StudiesQ-HTCP be updated at any time by rerunning with updated MP file (new information for gene, desc, etc)- or maybe better to always start with a new template?
  989. # * Need to be aware of file formatting to avoid dates (e.g., with gene names like MAY24, OCT1, etc, and with plate locations 1E1, 1E2, etc)- this has been less of a problem.
  990. # * In StudiesQHTCP folders, remember to annotate Exp1, Exp2, in the StudyInfo.csv file.
  991. # * Where are gene names called from for labeling REMc heatmaps, TSHeatmaps, Z-interaction graphs, etc? Is this file in the QHTCP 'code' folder, or is it in the the results file (and thus ultimately the MP file)?
  992. # * Is it ok for a MasterPlate_ file to have multiple sheets (e.g., readme tab- is only the first tab read in)?
  993. # * What are the rules for pulling information from the MasterPlateFile to the !!Results_ (e.g., is it the column or the Header Name, etc that is searched? Particular cells in the DrugMedia file?).
  994. # * Modifier, Conc are from DM sheet, and refer to the agar media arrays. OrfRep is from MasterPlate_ File. 'Specifics' (Last Column) is experiment specific and accommodate designs involving differences across the multi-well liquid arrays. 'StrainBkGrd' (now 'Library location') is in the 3rd column and reported after 'Specifics' at the last col of the '!!Results..' file.
  995. # * Do we have / could we make an indicator- work in progress or idle/complete with MP/DM and after gen-report. Now, we can check for the MPDMmat.mat file, or we can look in PrintResults, but would be nice to know without looking there.
  996. # * File>>Load Experiment wasn't working (no popup to redirect). Check this again.
  997. easy() {
  998. debug "Running: ${FUNCNAME[0]}"
  999. cat <<-EOF
  1000. To analyze a new Q-HTCP experiment:
  1001. * Open the EASY Software.
  1002. * Open 'EASYconsole.m' with MATLAB
  1003. * Click the Run icon (play button)
  1004. * When prompted, click "Change Folder" (do not select "Add to Path").
  1005. * In the pop-up display, select from the 'File' dropdown: 'New Experiment'.
  1006. * From the pop-up, choose where to save the new file.
  1007. * Navigate to the relevant job in the ExpJobs folder, name the file accordingly, and click 'save'.
  1008. * The newly created .mat file in the newly created Results folder will automatically be loaded.
  1009. * The file name will then be automatically appended by the code with the current date information (e.g. 'A1.mat' will become 'Results2023-07-19A1)
  1010. * If the experiment has already been created, it can be reloaded by clicking 'Load Experiment' instead of 'New Experiment' and selecting the relevant results
  1011. * Next, in the pop-up display, click on the 'Run' dropdown menu and select 'Image CurveFit ComboAnalysis'.
  1012. * In the updated pop-up, choose/highlight all desired image folders for analysis (this is generally all of the folders, since only the ones that need analysis should be there) and then click on 'continue'.
  1013. * As the program is running, updates will periodically appear in the Command Window; there will be an initial pause at "Before call to NIscanIntens…..".
  1014. * When the curve fitting is finished, the EASY console will pop back up.
  1015. * Check to see the completed analysis results in the newly created 'PrintResults' Folder, inside of the 'Results' Folder.
  1016. * Other folders ('CFfigs', 'figs', 'Fotos') are created for later optional use and will be empty.
  1017. * NOTE: The image analysis is completed independent of labeling the data (strains, media type, etc. Labeling happens next with the 'GenReports' function).
  1018. * Next, click on the 'GenReports' dropdown and select 'DrugMediaMP Generate .mat'
  1019. * NOTE: The 'MasterPlate' and 'DrugMedia' files have very specific formats and should be completed from a template.
  1020. * The Masterplate file must be exact (it must contain all and only the strains that were actually tested).
  1021. * For example, if only part of a library is tested, the complete library file must be modified to remove irrelevant strains.
  1022. * You will be prompted to first select the 'MasterPlate' file. You will need to navigate away from the working directory to get to it.
  1023. * It is fine for the 'MasterPlate_' file to be .xlsx (or .xls), and if you don't see it in the popup window, then change the file type from '.xls' to "all files" and then select it.
  1024. * Once it is selected, a report of the number of master plates in the file will pop up; when the report appears, assuming it is correct, click on 'OK'.
  1025. * You will then be prompted to select the 'DrugMedia' file from the relevant job folder. You will automatically return to the correct prior directory location.
  1026. * Choose it and click 'OK'. You may see a warning about column headers being modified, but that's ok.
  1027. * This will create an additional file in the 'MasterPlatesFiles' folder named 'MPDMmat.mat'
  1028. * Click on the 'GenReports' dropdown and select 'Results_Generate.'
  1029. * You will first see '!!ResultsElr_.txt' generated in the 'PrintResults' folder.
  1030. * Refreshing will reveal an increasing file size until you see the '!!ResultsStd_.txt' being generated.
  1031. * When finished, the '!!ResultsStd_.txt' will be about the same file size and it should be used in the following StudiesQHTCP analysis.
  1032. 'NoGrowth_.txt', and 'GrowthOnly_.txt' files will be generated in the 'PrintResults' folder.
  1033. EOF
  1034. declare -gx EASY_DIR="$APPS_DIR/matlab/easy"
  1035. script="$EASY_DIR/EASYconsole.m"
  1036. # Prompt user for suffix
  1037. echo "Current EASY results directory: $EASY_RESULTS_DIR"
  1038. ((YES)) || read -r -p "Enter a custom suffix and/or hit enter to use the default directory (no suffix): " EASY_SUFFIX
  1039. [[ -n $EASY_SUFFIX ]] && EASY_RESULTS_DIR+="_$EASY_SUFFIX"
  1040. # This dirname is separate from the project's so multiple EASY results can be generated
  1041. declare -gx EASY_PROJECT_NAME="${EASY_RESULTS_DIR##*/}"
  1042. debug "EASY results project name: $EASY_PROJECT_NAME"
  1043. # Backup and create EASY results dirs
  1044. [[ -d $EASY_RESULTS_DIR ]] && backup "$EASY_RESULTS_DIR"
  1045. [[ -d $EASY_RESULTS_DIR ]] || execute mkdir -p "$EASY_RESULTS_DIR"
  1046. # Make EASY dirs
  1047. dirs=('PrintResults' 'CFfigs' 'Fotos')
  1048. for d in "${dirs[@]}"; do
  1049. if [[ ! -d $EASY_RESULTS_DIR/$d ]]; then
  1050. execute mkdir -p "$EASY_RESULTS_DIR/$d"
  1051. fi
  1052. done
  1053. # Copy Templates
  1054. declare -gx DRUG_MEDIA_FILE="$SCANS_DIR/DrugMedia_$PROJECT.xls"
  1055. declare -gx MASTER_PLATE_FILE="$EASY_RESULTS_DIR/MasterPlate_$PROJECT.xls"
  1056. execute rsync -a "$EASY_DIR"/{figs,PTmats} "$EASY_RESULTS_DIR"
  1057. # Ask the user to launch EASYconsole.m in MATLAB
  1058. # MATLAB doesn't support passing args to scripts se we have to use ENV VARS instead
  1059. # TODO will need to play with the -sd startup option to see what works (well)
  1060. # Skip this step altogether in auto mode since it requires graphical interaction
  1061. if ! ((YES)) && ask "Start EASY in MATLAB? This requires a GUI."; then
  1062. # Add EASY directory to the Matlab path
  1063. # If this does not work we can try changing the -sd argument and if that fails then pushing/popping
  1064. debug "Adding EASY directory to the Matlab path"
  1065. "$MATLAB" -nodisplay -nosplash -nodesktop -nojvm -batch "addpath('$EASY_DIR')"
  1066. # Launch matlab
  1067. # matlab -nosplash -sd "$PROJECT_SCANS_DIR" -r "run $script"
  1068. "$MATLAB" -nosplash -r "run $script"
  1069. fi
  1070. }
  1071. module ezview
  1072. # @description TODO WIP
  1073. ezview() {
  1074. debug "Running: ${FUNCNAME[0]}"
  1075. declare -gx EZVIEW_DIR="$APPS_DIR/matlab/ezview"
  1076. script="$EZVIEW_DIR/EZviewGui.m"
  1077. if ! ((YES)) && ask "Start EASY in MATLAB? This requires a GUI."; then
  1078. # Make EZview dirs
  1079. # Start EZview
  1080. "$MATLAB" -nosplash -r "run $script"
  1081. fi
  1082. }
  1083. module qhtcp
  1084. # @description System for Multi-QHTCP-Experiment Gene Interaction Profiling Analysis
  1085. #
  1086. # * Functional rewrite of REMcMaster3.sh, RemcMaster2.sh, REMcJar2.sh, ExpFrontend.m, mProcess.sh, mFunction.sh, mComponent.sh
  1087. # * Added a newline character to the end of the study info file so it is a valid text file
  1088. #
  1089. # TODO
  1090. #
  1091. # * StudiesArchive should be smarter:
  1092. # * Create a database with as much information as possible
  1093. # * Write a function that easily loads and parses databse into easy-to-use variables
  1094. # * Allow users to reference those variables to write their own modules
  1095. # * Should not be using initials
  1096. # * not unique enough and we don't have that data easily on hand
  1097. # * usernames are unique and make more sense
  1098. # * I don't know what all would have to be modified atm
  1099. #
  1100. # Rerunning this module uses rsync --update to only copy files that are newer in the template
  1101. # If you wish for the template to overwrite your changes, delete the file from your QHTCP project dir
  1102. #
  1103. # To create a new study (Experiment Specific Interaction Zscores generation)
  1104. #
  1105. # * StudyInfo.csv instructions:
  1106. # * In your files directory, open the /Code folder, edit the 'StudyInfo.csv' spreadsheet, and save it as a 'csv' file to give each experiment the labels you wish to be used for the plots and specific files.
  1107. # * Enter the desired Experiment names- **order the names in the way you want them to appear in the REMc heatmaps; and make sure to run the front end programs (below) in the correct order (e.g., run front end in 'exp1' folder to call the !!Results file for the experiment you named as exp1 in the StudyInfo.csv file)
  1108. # * The GTA and pairwise, TSHeatmaps, JoinInteractions and GTF Heatmap scripts use this table to label results and heatmaps in a meaningful way for the user and others. The BackgroundSD and ZscoreJoinSD fields will be filled automatically according to user specifications, at a later step in the QHTCP study process.
  1109. #
  1110. # * MATLAB ExpFrontend.m was made for recording into a spreadsheet ('StudiesDataArchive.txt') the date and files used (i.e., directory paths to the !!Results files used as input for Z-interaction script) for each multi-experiment study.
  1111. # Give each experiment the labels you wish to be used for the plots and specific files.
  1112. # Enter the desired Experiment names and order them in the way you want them to appear in the REMc heatmaps;
  1113. # Run the front end MATLAB programs in the correct order (e.g., run front end in 'exp1' folder to call the !!Results file for the experiment you named as exp1 in the StudyInfo.csv file)
  1114. # The GTA and pairwise, TSHeatmaps, JoinInteractions and GTF Heatmap scripts use this table to label results and heatmaps in a meaningful way for the user and others.
  1115. # The BackgroundSD and ZscoreJoinSD fields will be filled automatically according to user specifications, at a later step in the QHTCP study process.
  1116. #
  1117. # * Open MATLAB and in the application navigate to each specific /Exp folder, call and execute ExpFrontend.m by clicking the play icon.
  1118. # * Use the "Open file" function from within Matlab.
  1119. # * Do not double-click on the file from the directory.
  1120. # * When prompted, navigate to the ExpJobs folder and the PrintResults folder within the correct job folder.
  1121. # * Repeat this for every Exp# folder depending on how many experiments are being performed.
  1122. # * Note: Before doing this, it's a good idea to compare the ref and non-ref CPP average and median values. If they are not approximately equal, then may be helpful to standardize Ref values to the measures of central tendency of the Non-refs, because the Ref CPPs are used for the z-scores, which should be centered around zero.
  1123. # * This script will copy the !!ResultsStd file (located in /PrintResults in the relevant job folder in /scans **rename this !!Results file before running front end; we normally use the 'STD' (not the 'ELR' file) chosen to the Exp# directory as can be seen in the “Current Folder” column in MATLAB, and it updates 'StudiesDataArchive.txt' file that resides in the /StudiesQHTCP folder. 'StudiesDataArchive.txt' is a log of file paths used for different studies, including timestamps.
  1124. #
  1125. # Do this to document the names, dates and paths of all the studies and experiment data used in each study. Note, one should only have a single '!!Results…' file for each /Exp_ to prevent ambiguity and confusion. If you decide to use a new or different '!!Results…' sheet from what was used in a previous “QHTCP Study”, remove the one not being used. NOTE: if you copy a '!!Results…' file in by hand, it will not be recorded in the 'StudiesDataArchive.txt' file and so will not be documented for future reference. If you use the ExpFrontend.m utility it will append the new source for the raw !!Results… to the 'StudiesDataArchive.txt' file.
  1126. # As stated above, it is advantageous to think about the comparisons one wishes to make so as to order the experiments in a rational way as it relates to the presentation of plots. That is, which results from sheets and selected 'interaction … .R', user modified script, is used in /Exp1, Exp2, Exp3 and Exp4 as explained in the following section.
  1127. # TODO MUST CLEAN UP QHTCP TEMPLATE DIRECTORY
  1128. #
  1129. #
  1130. # As stated earlier, the user can add folders to back up temporary results, study-related notes, or other related work.
  1131. # However, it is advised to set up and use separate STUDIES when evaluating differing data sets whether that is from experiment results files or from differing data selections in the first interaction … .R script stage.
  1132. # This reduces confusion at the time of the study and especially for those reviewing study analysis in the future.
  1133. #
  1134. # How-To Procedure: Execute a Multi-experiment Study:
  1135. #
  1136. # * Consider the goals of the study and design a strategy of experiments to include in the study.
  1137. # * Consider the quality of the experiment runs using EZview to see if there are systematic problems that are readily detectable.
  1138. # * In some cases, one may wish to design a 'pilot' study for discovery purposes.
  1139. # * There is no problem doing that, just take a template study, copy and rename it as XYZpilotStudy etc.
  1140. # * However, careful examination of the experimental results using EZview will likely save time in the long run.
  1141. # * One may be able to relatively quickly run the interaction Z scores (the main challenge there is the user creation of customized interaction… .R code.
  1142. # * I have tried to simplify this by locating the user edits near the top).
  1143. #
  1144. #
  1145. qhtcp() {
  1146. debug "Running: ${FUNCNAME[0]}"
  1147. [[ -d $PROJECT_RESULTS_DIR ]] ||
  1148. err "$PROJECT_RESULTS_DIR does not exist, have you run the init_project module?"
  1149. # # Create studies archive file if missing
  1150. # if ! [[ -d $STUDIES_ARCHIVE_FILE ]]; then
  1151. # header=(StudyDate tStudyName StudyPath ExpNum ExpDate ExpPath ResultFile)
  1152. # printf "%s\t" "${header[@]}" > "$STUDIES_ARCHIVE_FILE"
  1153. # fi
  1154. # # TODO Add them all to StudiesDataArchive?
  1155. # # Probably better to always add and remove dupes later since each invocation "counts"?
  1156. # for f in "${EASY_RESULTS_FILES[@]}"; do
  1157. # for study in "${STUDIES[@]}"; do
  1158. # read -r num sd dir <<< "$study"
  1159. # # Trying to match old ExpFrontend formatting
  1160. # printf "%s\t" \
  1161. # "${DATE//_/}" "$PROJECT" "$PROJECT_RESULTS_DIR" "Exp$num" \
  1162. # "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \
  1163. # >> "$STUDIES_ARCHIVE_FILE"
  1164. # done
  1165. # done
  1166. # Run R interactions script on all studies
  1167. calculate_interaction_zscores; exit \
  1168. && join_interaction_zscores \
  1169. && remc \
  1170. && gtf \
  1171. && gta
  1172. }
  1173. module remc
  1174. # @description remc module for QHTCP
  1175. #
  1176. # TODO
  1177. #
  1178. # * Which components can be parallelized?
  1179. #
  1180. #
  1181. # @arg $1 string study info file
  1182. remc() {
  1183. debug "Running: ${FUNCNAME[0]}"
  1184. # If any wrappers fail the rest will not run, this is fundamental to module design
  1185. # Remove leading && to run regardless
  1186. java_extract \
  1187. && r_add_shift_values \
  1188. && r_create_heat_maps \
  1189. && r_heat_maps_homology
  1190. }
  1191. module gtf
  1192. # shellcheck disable=SC2120
  1193. # @description GTF module for QHTCP
  1194. # @arg $1 string output directory
  1195. # @arg $2 string gene_association.sgd
  1196. # @arg $3 string gene_ontology_edit.obo
  1197. # @arg $4 string ORF_List_Without_DAmPs.txt
  1198. gtf() {
  1199. debug "Running: ${FUNCNAME[0]}"
  1200. process_dir="$GTF_OUT_DIR/process"
  1201. function_dir="$GTF_OUT_DIR/function"
  1202. component_dir="$GTF_OUT_DIR/component"
  1203. py_gtf_dcon \
  1204. "$process_dir" \
  1205. "$GTF_OUT_DIR"
  1206. # Reproduce the function and components dirs from the process dir
  1207. for d in "$function_dir" "$component_dir"; do
  1208. execute rsync -a "$process_dir/" "$d/"
  1209. done
  1210. for d in "$process_dir" "$function_dir" "$component_dir"; do
  1211. out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
  1212. txts=("$d"/*.txt) # glob all txt files from each dir
  1213. for txt in "${txts[@]}"; do
  1214. pl_gtf_analyze "$txt"
  1215. pl_gtf_terms2tsv "$txt"
  1216. done
  1217. py_gtf_concat "$GTF_OUT_DIR" "$out_file"
  1218. done
  1219. r_compile_gtf "$GTF_OUT_DIR"
  1220. }
  1221. module gta
  1222. # shellcheck disable=SC2120
  1223. # @description GTA module for QHTCP
  1224. #
  1225. # TODO
  1226. #
  1227. # *
  1228. # *
  1229. #
  1230. # @arg $1 string output directory
  1231. # @arg $2 string gene_association.sgd
  1232. # @arg $3 string gene_ontology_edit.obo
  1233. # @arg $4 string go_terms.tab
  1234. # @arg $5 string All_SGD_GOTerms_for_QHTCPtk.csv
  1235. gta() {
  1236. debug "Running: ${FUNCNAME[0]}"
  1237. # gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
  1238. gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
  1239. sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
  1240. all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
  1241. # TODO This could be wrong, it could be in main results
  1242. [[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR"
  1243. execute mkdir -p "$GTA_OUT_DIR"
  1244. # Loop over the array and create pairwise arrays
  1245. for ((i=0; i<${#EXP_NUMS[@]}; i++)); do
  1246. for ((j=i+1; j<${#EXP_NUMS[@]}; j++)); do
  1247. pair=("${EXP_NUMS[i]}" "${EXP_NUMS[j]}")
  1248. echo "${pair[@]}"
  1249. done
  1250. done
  1251. # Create unique parwise combinations of study nums from dir names
  1252. exp_combos=()
  1253. for ((i=0; i<${#EXP_NUMS[@]}; i++)); do
  1254. # Loop through the array again
  1255. for ((j=0; j<${#EXP_NUMS[@]}; j++)); do
  1256. # If the indices are not the same
  1257. if [ "$i" != "$j" ]; then
  1258. # Print the unique combination
  1259. exp_combos+=("${EXP_NUMS[i]},${EXP_NUMS[j]}")
  1260. fi
  1261. done
  1262. done
  1263. # The following are three types of studies
  1264. # Individual studies
  1265. for exp_num in "${EXP_NUMS[@]}"; do
  1266. execute mkdir -p "$GTA_OUT_DIR/exp$exp_num"
  1267. r_gta "exp$exp_num" "$STUDY_RESULTS_DIR/exp$exp_num/zscores/zscores_interaction.csv"
  1268. done
  1269. # Combination studies (for pairwise comparisons)
  1270. for combo in "${exp_combos[@]}"; do
  1271. # Split on comma and assign to array
  1272. IFS=',' read -ra exps <<< "$combo"
  1273. r_gta_pairwiselk \
  1274. "${EXPERIMENTS[exp${exps[0]},name]}" \
  1275. "$GTA_OUT_DIR/exp${exps[0]}/Average_GOTerms_All.csv" \
  1276. "${EXPERIMENTS[exp${exps[1]},name]}" \
  1277. "$GTA_OUT_DIR/exp${exps[1]}/Average_GOTerms_All.csv" \
  1278. "$GTA_OUT_DIR"
  1279. done
  1280. r_gta_heatmaps \
  1281. "$STUDY_INFO_FILE" \
  1282. "$gene_ontology_obo" \
  1283. "$sgd_terms_tfile" \
  1284. "$all_sgd_terms_csv" \
  1285. "$STUDY_RESULTS_DIR" \
  1286. "$STUDY_RESULTS_DIR/TermSpecificHeatmaps" \
  1287. "${EXP_NUMS[@]}"
  1288. }
  1289. # @section Wrappers
  1290. # @description
  1291. #
  1292. # Wrappers:
  1293. #
  1294. # * Allow scripts to be called by the main workflow script using input and output arguments as a translation mechanism.
  1295. # * Only run by default if called by a module.
  1296. # * Can be called directly with its arguments as a comma-separated string
  1297. #
  1298. # @description
  1299. wrapper() {
  1300. debug "Adding $1 wrapper"
  1301. ALL_WRAPPERS+=("$1")
  1302. declare -gA "$1"
  1303. }
  1304. wrapper calculate_interaction_zscores
  1305. # @description Run the R interactions analysis (deprecates Z_InteractionTemplate.R)
  1306. # shellcheck disable=SC2120
  1307. #
  1308. # SCRIPT: apps/r/calculate_interaction_zscores.R
  1309. #
  1310. # TODO
  1311. #
  1312. # * More variables can be read in from the config file to allow more configuration
  1313. # * sd thresholds
  1314. # * lm thresholds
  1315. # * interaction thresholds
  1316. # * reference gene(s)
  1317. # * background genes
  1318. # * Add gene names, other threshold values, etc.
  1319. # * Dataframe columns and output file columns should be standardized in calculate_interactions()
  1320. # * Need to decide if conc_num_factor is numeric or factor
  1321. # * Do pdfs really need to be all different sizes?
  1322. # * We are using standard error bars using the same se values as the data now (includes Bessel's correction)
  1323. # * Plate analysis error bars and some others will be slightly different
  1324. # * Can be changed back but better to have plots reflect data, no?
  1325. # * Dynamically generate axis limits based on data (if desired)
  1326. # * Parallelize interaction plotting
  1327. #
  1328. # INPUT
  1329. #
  1330. # * easy/results_std.txt
  1331. #
  1332. # OUTPUT
  1333. #
  1334. # * zscores/zscores_interaction.csv
  1335. # * etc.
  1336. #
  1337. # NOTES
  1338. #
  1339. # *
  1340. #
  1341. # @arg $1 integer output directory
  1342. # @arg $2 string SGD_features.tab
  1343. # @arg $3 string easy/results_std.txt
  1344. # @arg $6 array triplets of experiment paths, names, sd threshold factor
  1345. calculate_interaction_zscores() {
  1346. debug "Running: ${FUNCNAME[0]} $*"
  1347. cat <<-EOF
  1348. * Be sure to enter Background noise filter standard deviation i.e., 3 or 5 per Sean
  1349. * Enter Standard deviation value for removing data for cultures due to high background (e.g., contaminated cultures).
  1350. * Generally set this very high (e.g., '20') on the first run in order NOT to remove data, e.g. '20'. Review QC data and inspect raw image data to decide if it is desirable to remove data, and then rerun analysis.
  1351. * Enter a Background SD threshold for EXCLUDING culture data from further analysis:
  1352. * This Background value removes data where there is high pixel intensity in the background regions of a spot culture (i.e., suspected contamination).
  1353. * 5 is a minimum recommended value, because lower values result in more data being removed, and often times this is undesirable if contamination occurs late after the carrying capacity of the yeast culture is reached.
  1354. * This is most often "trial and error", meaning there is a 'Frequency_Delta_Background.pdf' report in the /Exp_/ZScores/QC/ folder to evaluate whether the chosen value was suitable (and if not the analysis can simply be rerun with a more optimal choice).
  1355. * In general, err on the high side, with BSD of 10 or 12…. One can also use EZview to examine the raw images and individual cultures potentially included/excluded as a consequence of the selected value.
  1356. * Background values are reported in the results sheet and so could also be analyzed there.
  1357. EOF
  1358. declare script="$APPS_DIR/r/calculate_interaction_zscores.R"
  1359. declare -a out_paths=("${1:-"$STUDY_RESULTS_DIR/zscores"}")
  1360. for path in "${EXP_PATHS[@]}"; do
  1361. out_paths+=("${path}/zscores")
  1362. done
  1363. # TODO we'll need to change this behaviour after testing
  1364. # we can test for existence and then choose to skip or rerun later
  1365. # possibly handle in the module?
  1366. for out_path in "${out_paths[@]}"; do
  1367. backup "$out_path"
  1368. execute mkdir -p "$out_path" "$out_path/qc"
  1369. done
  1370. execute "$RSCRIPT" "$script" \
  1371. "${1:-"$STUDY_RESULTS_DIR"}" \
  1372. "${2:-"$APPS_DIR/r/SGD_features.tab"}" \
  1373. "${3:-"$EASY_RESULTS_DIR/results_std.txt"}" \
  1374. "${@:4}" \
  1375. "${EXP_PATHS_AND_NAMES_AND_SD_FACTORS[@]}"
  1376. }
  1377. wrapper join_interaction_zscores
  1378. # shellcheck disable=SC2120
  1379. # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
  1380. #
  1381. # TODO
  1382. #
  1383. # * Needs more loops to reduce verbosity
  1384. #
  1385. # INPUT
  1386. #
  1387. # * /out/PROJECT/STUDY/exp#/zscores/zscores_interaction.csv
  1388. #
  1389. # OUTPUT
  1390. #
  1391. # * combined_zscores.csv (REMcRdy_lm_only.csv)
  1392. # * combined_summary_stats (Shift_only.csv)
  1393. # * final_combined_report (parameters.csv)
  1394. #
  1395. # @arg $1 string output directory
  1396. # @arg $2 string sd value (default: 2)
  1397. # @arg $3 array pairs of experiment paths and names
  1398. join_interaction_zscores() {
  1399. debug "Running: ${FUNCNAME[0]} $*"
  1400. declare script="$APPS_DIR/r/join_interaction_zscores.R"
  1401. declare -a out_files=(
  1402. "${1:-$STUDY_RESULTS_DIR}/combined_zscores.csv"
  1403. "${1:-$STUDY_RESULTS_DIR}/combined_summary_stats.csv"
  1404. "${1:-$STUDY_RESULTS_DIR}/final_combined_report.csv"
  1405. )
  1406. # ((DEBUG)) && declare -p # when the going gets tough
  1407. execute "$RSCRIPT" "$script" \
  1408. "${1:-$STUDY_RESULTS_DIR}" \
  1409. "${2:-2}" \
  1410. "${@:3:}" \
  1411. "${EXP_PATHS_AND_NAMES[@]}"
  1412. for f in "${out_files[@]}"; do
  1413. [[ -f $f ]] || { echo "$f does not exist"; return 1; }
  1414. done
  1415. }
  1416. wrapper r_gta
  1417. # @description GTAtemplate R script
  1418. #
  1419. # TODO
  1420. #
  1421. # * Is GTAtemplate.R actually a template?
  1422. # * Do we need to allow user customization?
  1423. #
  1424. # INPUT
  1425. #
  1426. # * [gene_association.sgd](https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd)
  1427. # * go_terms.tab
  1428. #
  1429. # OUTPUT
  1430. #
  1431. # * Average_GOTerms_All.csv
  1432. #
  1433. #
  1434. # @arg $1 string exp# name (required)
  1435. # @arg $2 string zscores_interaction.csv (required)
  1436. # @arg $3 string go_terms.tab file
  1437. # @arg $4 string [gene_association.sgd](https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd)
  1438. # @arg $5 string output directory
  1439. r_gta() {
  1440. debug "Running: ${FUNCNAME[0]} $*"
  1441. cat <<-EOF
  1442. EOF
  1443. script="$APPS_DIR/r/gtaTemplate.R"
  1444. out_file="${5:-"$GTA_OUT_DIR"}/Average_GOTerms_All.csv"
  1445. execute "$RSCRIPT" "$script" \
  1446. "$1" \
  1447. "$2" \
  1448. "${3:-"$APPS_DIR/r/go_terms.tab"}" \
  1449. "${4:-"$APPS_DIR/r/gene_association.sgd"}" \
  1450. "${5:-"$GTA_OUT_DIR"}" \
  1451. "${@:6}" # future arguments
  1452. [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
  1453. }
  1454. wrapper r_gta_pairwiselk
  1455. # @description PairwiseLK.R R script
  1456. #
  1457. # TODO
  1458. #
  1459. # * Move directory creation from PairwiseLK.R to gta module
  1460. # * Needs better output filenames and directory organization
  1461. # * Needs more for looping to reduce verbosity
  1462. #
  1463. # INPUT
  1464. #
  1465. # * Average_GOTerms_All.csv
  1466. # *
  1467. #
  1468. # OUTPUT
  1469. #
  1470. # *
  1471. #
  1472. # This wrapper:
  1473. #
  1474. # * Will perform both L and K comparisons for the specified experiment folders.
  1475. # * The code uses the naming convention of PairwiseCompare_Exp’#’-Exp’#’ to standardize and keep simple the structural naming (where ‘X’ is either K or L and ‘Y’ is the number of the experiment GTA results to be found in ../GTAresult/Exp_).
  1476. # * {FYI There are also individual scripts that just do the ‘L’ or ‘K’ pairwise studies in the ../Code folder.}
  1477. #
  1478. # @arg $1 string First exp# name (required)
  1479. # @arg $2 string First exp# go terms file (required)
  1480. # @arg $3 string Second exp# name (required)
  1481. # @arg $4 string Second exp# go terms file (required)
  1482. # @arg $5 string output directory
  1483. #
  1484. r_gta_pairwiselk() {
  1485. debug "Running: ${FUNCNAME[0]} $*"
  1486. cat <<-EOF
  1487. EOF
  1488. script="$APPS_DIR/r/calculate_pairwise_lk.R"
  1489. execute "$RSCRIPT" "$script" \
  1490. "$1" \
  1491. "$2" \
  1492. "$3" \
  1493. "$4" \
  1494. "${5:-"$GTA_OUT_DIR"}" \
  1495. "${@:6}" # future arguments
  1496. }
  1497. wrapper r_gta_heatmaps
  1498. # @description TSHeatmaps5dev2.R R script
  1499. #
  1500. # TODO
  1501. #
  1502. # * Rename
  1503. # * Refactor to automatically allow more studies
  1504. # * Refactor with more looping to reduce verbosity
  1505. # * Reduce cyclomatic complexity of some of the for loops
  1506. #
  1507. # Files
  1508. #
  1509. # *
  1510. # *
  1511. #
  1512. # Output
  1513. #
  1514. # *
  1515. #
  1516. # This wrapper:
  1517. #
  1518. # * The Term Specific Heatmaps are produced directly from the ../ExpStudy/Exp_/ZScores/ZScores_Interaction.csv file generated by the user modified interaction… .R script.
  1519. # * The heatmap labeling is per the names the user wrote into the study info file
  1520. # * Verify that the All_SGD_GOTerms_for_QHTCPtk.csv found in ../Code is what you wish to use or if you wish to use a custom modified version.
  1521. # * If you wish to use a custom modified version, create it and modify the TSHeatmaps template script (TSHeatmaps5dev2.R) and save it as a ‘TSH_study specific name’.
  1522. #
  1523. # @arg $1 string study info file
  1524. # @arg $2 string gene_ontology_edit.obo
  1525. # @arg $3 string go_terms.tab
  1526. # @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
  1527. # @arg $5 string base directory
  1528. # @arg $6 string output directory
  1529. #
  1530. r_gta_heatmaps() {
  1531. debug "Running: ${FUNCNAME[0]} $*"
  1532. cat <<-EOF
  1533. EOF
  1534. script="$APPS_DIR/r/TSHeatmaps5dev2.R"
  1535. [[ -d $7 ]] || execute mkdir -p "$7"
  1536. execute "$RSCRIPT" "$script" \
  1537. "${1:-$STUDY_INFO_FILE}" \
  1538. "${2:-"$APPS_DIR/r/gene_ontology_edit.obo"}" \
  1539. "${3:-"$APPS_DIR/r/go_terms.tab"}" \
  1540. "${4:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" \
  1541. "${5:-"$STUDY_RESULTS_DIR"}" \
  1542. "${6:-"$STUDY_RESULTS_DIR/TermSpecificHeatmaps"}" \
  1543. "${@:7}" # studies
  1544. }
  1545. wrapper java_extract
  1546. # shellcheck disable=SC2120
  1547. # @description Jingyu's REMc java utility
  1548. #
  1549. # TODO
  1550. #
  1551. # * Closed-source w/ hardcoded output directory, so have to pushd/popd to run (not ideal)
  1552. #
  1553. # INPUT
  1554. #
  1555. # * study_dir/combined_zscores.csv (REMcRdy_lm_only.csv)
  1556. #
  1557. # OUTPUT
  1558. #
  1559. # * study_dir/combined_zscores_final.csv (REMcRdy_lm_only.csv-finalTable.csv)
  1560. #
  1561. # @arg $1 string output directory
  1562. # @arg $2 string combined_zscores.csv
  1563. # @arg $3 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab
  1564. # @arg $4 string ORF_List_Without_DAmPs.txt
  1565. # @exitcode 0 if expected output file exists
  1566. # @exitcode 1 if expected output file does not exist
  1567. java_extract() {
  1568. debug "Running: ${FUNCNAME[0]}"
  1569. classpath="$APPS_DIR/java/weka-clustering/weka-clustering.jar"
  1570. output_file="${1:-$STUDY_RESULTS_DIR}/combined_zscores_final.csv"
  1571. [[ -f $output_file ]] && backup "$output_file"
  1572. java_cmd=(
  1573. "$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8
  1574. -classpath "$classpath" ExecMain
  1575. "${2:-"$STUDY_RESULTS_DIR/combined_zscores.csv"}"
  1576. "${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
  1577. "${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
  1578. 1
  1579. true
  1580. )
  1581. debug "pushd && ${java_cmd[*]} && popd"
  1582. pushd "${1:-$STUDY_RESULTS_DIR}" && "${java_cmd[@]}" && popd || return 1
  1583. [[ -f $output_file ]]
  1584. }
  1585. wrapper r_add_shift_values
  1586. # shellcheck disable=SC2120
  1587. # @description Add shift values back to REMcRdy_lm_only.csv-finalTable.csv
  1588. # and output "REMcWithShift.csv" for use with the REMc heat maps
  1589. #
  1590. # @arg $1 string REMcRdy_lm_only.csv-finalTable.csv
  1591. # @arg $2 string Shift_only.csv
  1592. # @arg $3 string study info file
  1593. # @arg $4 string REMcWithShift.csv
  1594. r_add_shift_values() {
  1595. debug "Running: ${FUNCNAME[0]} $*"
  1596. script="$APPS_DIR/r/addShiftVals.R"
  1597. execute "$RSCRIPT" "$script" \
  1598. "${1:-"$STUDY_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv"}" \
  1599. "${2:-"$STUDY_RESULTS_DIR/Shift_only.csv"}" \
  1600. "${3:-$STUDY_INFO_FILE}" \
  1601. "${4:-"$STUDY_RESULTS_DIR/REMcWithShift.csv"}" \
  1602. "${@:5}" # future arguments
  1603. rm -f "$STUDY_RESULTS_DIR/REMcHeatmaps/"*.pdf # TODO why?
  1604. out_file="${4:-"$STUDY_RESULTS_DIR/REMcWithShift.csv"}"
  1605. [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
  1606. }
  1607. wrapper r_create_heat_maps
  1608. # shellcheck disable=SC2120
  1609. # @description Execute createHeatMaps.R
  1610. #
  1611. # INPUT
  1612. #
  1613. # * REMcWithShift.csv
  1614. #
  1615. # OUTPUT
  1616. #
  1617. # * compiledREMcHeatmaps.pdf
  1618. #
  1619. # TODO
  1620. #
  1621. # * Needs more looping for brevity
  1622. #
  1623. #
  1624. #
  1625. # @arg $1 string The final shift table (REMcWithShift.csv)
  1626. # @arg $2 string The output directory
  1627. r_create_heat_maps() {
  1628. debug "Running: ${FUNCNAME[0]} $*"
  1629. script="$APPS_DIR/r/createHeatMaps.R"
  1630. execute "$RSCRIPT" "$script" \
  1631. "${1:-"$STUDY_RESULTS_DIR/REMcWithShift.csv"}" \
  1632. "${2:-"$STUDY_RESULTS_DIR"}" \
  1633. "${@:3}" # future arguments
  1634. pdfs=(REMcHeatmaps/*.pdf)
  1635. execute pdftk "${pdfs[@]}" output "$out_file"
  1636. out_file="$2/compiledREMcHeatmaps.pdf"
  1637. [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
  1638. }
  1639. wrapper r_heat_maps_homology
  1640. # shellcheck disable=SC2120
  1641. # @description Execute createHeatMapsHomology.R
  1642. #
  1643. # @arg $1 string output directory
  1644. # @arg $2 string REMcRdy_lm_only.csv-finalTable.csv
  1645. # @arg $3 string 170503_DAmPs_Only.txt
  1646. # @arg $4 string (Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv)
  1647. r_heat_maps_homology() {
  1648. debug "Running: ${FUNCNAME[0]} $*"
  1649. script="$APPS_DIR/r/createHeatMapsHomology.R"
  1650. out_file="${1:-"$STUDY_RESULTS_DIR/homology"}/compiledREMcHomologyHeatmaps.pdf"
  1651. debug "Removing old pdf and csv files from ${1:-"$STUDY_RESULTS_DIR/homology"}"
  1652. rm "${1:-"$STUDY_RESULTS_DIR/homology"}/"*.{pdf,csv}
  1653. execute "$RSCRIPT" "$script" \
  1654. "${1:-"$STUDY_RESULTS_DIR/homology"}" \
  1655. "${2:-"$STUDY_RESULTS_DIR/REMcRdy_lm_only.csv-finalTable.csv"}" \
  1656. "${3:-"$APPS_DIR/r/170503_DAmPs_Only.txt"}" \
  1657. "${4:-"$APPS_DIR/r/Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv"}" \
  1658. "${@:5}" # future arguments
  1659. pdfs=("${1:-"$STUDY_RESULTS_DIR/homology"}"/*.pdf)
  1660. execute pdftk "${pdfs[@]}" output "$out_file"
  1661. [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
  1662. }
  1663. wrapper py_gtf_dcon
  1664. # @description Perform python dcon portion of GTF
  1665. #
  1666. # SCRIPT: [DconJG2.py](apps/python/DconJG2.py)
  1667. #
  1668. # OUTPUT
  1669. #
  1670. # * 1-0-0-finaltable.csv
  1671. #
  1672. # @arg $1 string Directory to process
  1673. # @arg $2 string Output directory name
  1674. py_gtf_dcon() {
  1675. debug "Running: ${FUNCNAME[0]} $*"
  1676. script="$APPS_DIR/python/DconJG2.py"
  1677. execute "$PYTHON" "$script" \
  1678. "$1" \
  1679. "$2/" \
  1680. "${@:3}" # future arguments
  1681. out_file="$2/1-0-0-finaltable.csv"
  1682. [[ -f $out_file ]] || { echo "$out_file does not exist"; return 1; }
  1683. }
  1684. wrapper pl_gtf_analyze
  1685. # @description Perl analyze wrapper
  1686. #
  1687. # SCRIPT: [analyze_v2.pl](https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl)
  1688. #
  1689. # TODO
  1690. #
  1691. # * Are we just overwriting the same data for all set2 members?
  1692. # * Why the custom version?
  1693. #
  1694. # @arg $1 string txt to analyze (required)
  1695. # @arg $2 string gene_association.sgd
  1696. # @arg $3 string gene_ontology_edit.obo
  1697. # @arg $4 string ORF_List_Without_DAmPs.txt
  1698. pl_gtf_analyze() {
  1699. debug "Running: ${FUNCNAME[0]} $*"
  1700. script="$APPS_DIR/perl/analyze_v2.pl"
  1701. execute "$PERL" "$script" \
  1702. "-an" "${2:-"$APPS_DIR/r/gene_association.sgd"}" \
  1703. "-as" "P" \
  1704. "-o" "${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" \
  1705. "-b" "${4:-"$APPS_DIR/r/ORF_List_Without_DAmPs.txt"}" \
  1706. "$1"
  1707. }
  1708. wrapper pl_gtf_terms2tsv
  1709. # @description Perl terms2tsv wrapper
  1710. #
  1711. # TODO
  1712. #
  1713. # * Probably should be translated to shell/python
  1714. #
  1715. # @arg $1 string Terms file TODO naming
  1716. pl_gtf_terms2tsv() {
  1717. debug "Running: ${FUNCNAME[0]} $*"
  1718. script="$APPS_DIR/perl/terms2tsv.pl"
  1719. debug "$PERL $script $1.terms > $1.tsv"
  1720. "$PERL" "$script" "$1.terms" > "$1.tsv"
  1721. }
  1722. wrapper py_gtf_concat
  1723. # @description Python concat wrapper for GTF
  1724. # Concat the process ontology outputs from the /REMcReady_lm_only folder
  1725. #
  1726. # TODO
  1727. #
  1728. # * Probably should be translated to bash
  1729. #
  1730. # @arg $1 string output directory name to look for txt files
  1731. # @arg $2 string output file
  1732. py_gtf_concat() {
  1733. debug "Running: ${FUNCNAME[0]} $*"
  1734. script="$APPS_DIR/python/concatGTFResults.py"
  1735. execute "$PYTHON" "$script" "$1/" "$2"
  1736. [[ -f $2 ]] || { echo "$2 does not exist"; return 1; }
  1737. }
  1738. wrapper r_compile_gtf
  1739. # @description Compile GTF in R
  1740. # @arg $1 string gtf output directory
  1741. r_compile_gtf() {
  1742. debug "Running: ${FUNCNAME[0]} $*"
  1743. script="$APPS_DIR/r/CompileGTF.R"
  1744. execute "$RSCRIPT" "$script" "$1"
  1745. }
  1746. # @description Selects a results directory
  1747. # @exitcode 0 if successfully chose a results dir
  1748. # @set EASY_RESULTS_DIR string The working EASY output directory
  1749. # @arg $1 string directory containing results dirs matching the prefix regex
  1750. # @arg $2 string variable name to declare
  1751. handle_results_dir() {
  1752. debug "Running: ${FUNCNAME[0]} $*"
  1753. declare results_dir
  1754. declare -a results_dirs=("$1"/[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_*_*/) # glob for results directories
  1755. results_dirs=("${results_dirs[@]%/}") # remove trailing slashes
  1756. # Filter directory names
  1757. declare -a filtered_dirs=()
  1758. for dir in "${results_dirs[@]}"; do
  1759. base_dir="${dir##*/}" # get basename
  1760. if sanitize_pn "$base_dir"; then
  1761. filtered_dirs+=("$dir")
  1762. fi
  1763. done
  1764. # Sort filtered directories by date prefix in case the glob was mixed
  1765. mapfile -t results_dirs < <(printf '%s\n' "${filtered_dirs[@]}" | sort)
  1766. num=${#results_dirs[@]}
  1767. ((DEBUG)) && print_in_columns "results_dirs"
  1768. # Choose a results dir based on number of results dirs
  1769. if [[ $num -eq 1 ]] && ((YES)); then
  1770. results_dir="${results_dirs[0]}"
  1771. elif ((YES)); then
  1772. results_dir="${results_dirs[-1]}"
  1773. else
  1774. ask_name "$2" ADD_RESULTS
  1775. results_dirs=("${ADD_RESULTS[@]}")
  1776. if [[ $num -eq 1 ]]; then
  1777. results_dir="${results_dirs[0]}"
  1778. else
  1779. # Print results dirs
  1780. [[ ${#results_dirs[@]} -gt 0 ]] || { err "No ${2}s found"; return 1; }
  1781. print_in_columns "results_dirs"
  1782. # Results selection prompt
  1783. cat <<-EOF
  1784. ${underline}Select $2 by number${nounderline}
  1785. * <Enter> for the latest $2 ($num)
  1786. * 0 to create a new $2
  1787. EOF
  1788. read -r -p "($num): " response
  1789. echo
  1790. response="${response:-$num}"
  1791. if [[ $response -eq 0 ]]; then
  1792. ask_name "$2" ADD_RESULTS
  1793. results_dirs=("${ADD_RESULTS[@]}")
  1794. results_dir="${results_dirs[-1]}"
  1795. else
  1796. results_dir="${results_dirs[$((response-1))]}"
  1797. fi
  1798. fi
  1799. fi
  1800. # Set the fallback
  1801. [[ -z $results_dir ]] && results_dir="$1/${STUDY_PREFIX}_${PROJECT_NAME}"
  1802. # Create directory and set global variable
  1803. [[ -d $results_dir ]] || execute mkdir -p "$results_dir"
  1804. declare -gx "$2"="$results_dir"
  1805. }
  1806. module generate_markdown
  1807. # @description Generates shdoc markdown from this script
  1808. # @noargs
  1809. # @internal
  1810. generate_markdown() {
  1811. debug "Running: ${FUNCNAME[0]}"
  1812. # Print markdown to stdout
  1813. ((DEBUG)) && shdoc < "$SCRIPT"
  1814. # Create markdown file
  1815. shdoc < "$SCRIPT" > README.md
  1816. }
  1817. # @description Parses a simple "bashy" config file
  1818. # @arg $1 study config file
  1819. # @internal
  1820. handle_config() {
  1821. debug "Running: ${FUNCNAME[0]} $*"
  1822. # Determine the configuration file path
  1823. declare config_file="${1:-${STUDY_CONFIG_FILE:-"$STUDY_RESULTS_DIR/study_config"}}"
  1824. declare config_array_name="EXPERIMENTS"
  1825. # Ensure the function runs only once per project (in case of multiple modules)
  1826. (( PARSED_CONFIG )) && return 0
  1827. declare -g PARSED_CONFIG=1
  1828. # Default experiment settings for a study
  1829. declare -gA default_study_config
  1830. # shellcheck disable=SC2034
  1831. add_experiment() {
  1832. echo "Adding default Exp $1 to the study config file"
  1833. default_study_config["exp$1,name"]="${2:-"Experiment $1 for ${STUDY_RESULTS_DIR##*/} study"}"
  1834. default_study_config["exp$1,path"]="${3:-"$STUDY_RESULTS_DIR/exp$1"}"
  1835. default_study_config["exp$1,sd"]="${4:-3}"
  1836. default_study_config["exp$1,sd_L"]="${5:-0}"
  1837. default_study_config["exp$1,sd_K"]="${6:-0}"
  1838. default_study_config["exp$1,sd_pair_L"]="${7:-0}"
  1839. default_study_config["exp$1,sd_pair_K"]="${8:-0}"
  1840. default_study_config["exp$1,gene_exclude"]="${9:-"YDL227C"}"
  1841. default_study_config["exp$1,gene_include"]="${10:-0}"
  1842. }
  1843. # Check if the config file exists
  1844. if ! [[ -f $config_file ]]; then
  1845. declare -a exp_dirs
  1846. [[ -n $1 ]] && dirname="${1%/*}"
  1847. exp_dirs=("${dirname:-$STUDY_RESULTS_DIR}/exp"[0-9]*/)
  1848. exp_dirs=("${exp_dirs[@]%/}")
  1849. if [[ ${#exp_dirs[@]} -gt 0 ]]; then
  1850. for exp_dir in "${exp_dirs[@]}"; do
  1851. num="${exp_dir##*/}"
  1852. num="${num#exp}"
  1853. add_experiment "$num"
  1854. done
  1855. else
  1856. echo "No config file found at $config_file"
  1857. echo "No experiment directories found in $STUDY_RESULTS_DIR"
  1858. if ask "Would you like to create a study config file at $config_file?"; then
  1859. ((YES)) || read -r -p "Number of experiments to create (4): " response
  1860. response="${response:-4}"
  1861. for ((i = 1; i <= response; i++)); do
  1862. add_experiment "$i"
  1863. done
  1864. fi
  1865. fi
  1866. write_config "$config_file" "default_study_config" "$config_array_name" &&
  1867. echo "Configuration file generated at $config_file"
  1868. fi
  1869. # Source the config file for the config array
  1870. # shellcheck disable=SC1090
  1871. . "$config_file"
  1872. # Declare a nameref to refer to the actual array using the variable name
  1873. declare -n config_array_ref="$config_array_name"
  1874. [[ -z ${!config_array_ref[*]} ]] && { err "No $config_array_name array found in $config_file" && return 1; }
  1875. for key in "${!config_array_ref[@]}"; do
  1876. IFS=',' read -r main_key sub_key <<< "$key"
  1877. lines+=("${config_array_name}[$main_key,$sub_key]=\"${config_array_ref[$key]}\"")
  1878. done
  1879. mapfile -t lines < <(printf "%s\n" "${lines[@]}" | sort)
  1880. echo "${underline}$config_file${nounderline}"
  1881. printf "%s\n" "${lines[@]}"
  1882. unset lines
  1883. # Prompt user to edit study config file if not in auto mode
  1884. if ! ((YES)) && ask "Would you like to edit the study config file?"; then
  1885. "$EDITOR" "$config_file"
  1886. # shellcheck disable=SC1090
  1887. . "$config_file"
  1888. fi
  1889. # Create some helpful arrays
  1890. declare -ga EXP_PATHS
  1891. # Loop over the keys in the associative array
  1892. for key in "${!config_array_ref[@]}"; do
  1893. if [[ $key == *,path ]]; then
  1894. EXP_PATHS+=("${config_array_ref[$key]}")
  1895. fi
  1896. done
  1897. declare -ga EXP_NUMS=("${EXP_PATHS[@]##*[!0-9]}")
  1898. declare -ga EXP_PATHS_AND_NAMES
  1899. for key in "${!config_array_ref[@]}"; do
  1900. if [[ $key == *,path ]]; then
  1901. main_key="${key%,*}"
  1902. name_key="${main_key},name"
  1903. if [[ -n ${config_array_ref[$name_key]} ]]; then
  1904. EXP_PATHS_AND_NAMES+=("${config_array_ref[$key]}" "${config_array_ref[$name_key]}")
  1905. fi
  1906. fi
  1907. done
  1908. declare -ga EXP_PATHS_AND_NAMES_AND_SD_FACTORS
  1909. for key in "${!config_array_ref[@]}"; do
  1910. if [[ $key == *,path ]]; then
  1911. main_key="${key%,*}"
  1912. name_key="${main_key},name"
  1913. sd_key="${main_key},sd"
  1914. if [[ -n ${config_array_ref[$name_key]} ]]; then
  1915. EXP_PATHS_AND_NAMES_AND_SD_FACTORS+=(
  1916. "${config_array_ref[$key]}"
  1917. "${config_array_ref[$name_key]}"
  1918. "${config_array_ref[$sd_key]}")
  1919. fi
  1920. fi
  1921. done
  1922. # declare -ga EXP_PATHS_AND_NAMES
  1923. # for key in "${!config_array_ref[@]}"; do
  1924. # if [[ $key == *,path ]]; then
  1925. # EXP_PATHS_AND_NAMES+=("${config_array_ref[$key]}")
  1926. # fi
  1927. # if [[ $key == *,name ]]; then
  1928. # EXP_PATHS_AND_NAMES+=("${config_array_ref[$key]}")
  1929. # fi
  1930. # done
  1931. return 0
  1932. }
  1933. # @description Write an associative array to a config file
  1934. # @arg $1 file to write
  1935. # @arg $2 name of the associative array
  1936. # @arg $3 name of the config array
  1937. write_config() {
  1938. debug "Running: ${FUNCNAME[0]} $*"
  1939. declare file="$1"
  1940. declare -n array="$2"
  1941. declare array_name="${3:-"EXPERIMENTS"}"
  1942. declare -a lines=()
  1943. # Iterate over the associative array to populate the lines array
  1944. for key in "${!array[@]}"; do
  1945. IFS=',' read -r main_key sub_key <<< "$key"
  1946. lines+=("${array_name}[$main_key,$sub_key]=\"${array[$key]}\"")
  1947. done
  1948. mapfile -t lines < <(printf "%s\n" "${lines[@]}" | sort) # re-sort the lines alphabetically
  1949. # Write the contents to the file using a HEREDOC
  1950. cat <<- EOF > "$file"
  1951. #!/usr/bin/env bash
  1952. # Declare the main associative array
  1953. declare -Ax ${array_name}
  1954. $(printf "%s\n" "${lines[@]}")
  1955. EOF
  1956. }
  1957. # @description The main loop of qhtcp-workflow
  1958. #
  1959. # @internal
  1960. main() {
  1961. debug "Running: ${FUNCNAME[0]} $*"
  1962. # Libraries
  1963. declare -g JAVA="${JAVA:-$(which java 2>/dev/null || echo java)}"
  1964. declare -g PYTHON="${PYTHON:-$(which python3 2>/dev/null || echo python)}"
  1965. declare -g PERL="${PERL:-$(which perl 2>/dev/null || echo perl)}"
  1966. declare -g RSCRIPT="${RSCRIPT:-$(which Rscript 2>/dev/null || echo Rscript)}"
  1967. declare -g MATLAB="${MATLAB:-$(which matlab 2>/dev/null || echo matlab)}"
  1968. declare -g EDITOR="${EDITOR:-"nano"}"
  1969. # Global vars
  1970. SCRIPT_NAME="${BASH_SOURCE[0]##*/}"
  1971. SCRIPT=$(realpath -s "${BASH_SOURCE[0]}")
  1972. SCRIPT_DIR=$(dirname "$SCRIPT")
  1973. APPS_DIR="$SCRIPT_DIR/apps"
  1974. # TEMPLATES_DIR="$SCRIPT_DIR/templates"
  1975. USER="${USER:-$(whoami)}"
  1976. DATE="$(date +%Y%m%d)" # change in EASYconsole.m to match 'hardcode'
  1977. # Find a scans directory
  1978. # local scans_heirarchy=("./scans" "/mnt/data/scans" "/mnt/data/ExpJobs" "./scans")
  1979. local scans_heirarchy=(
  1980. "$SCRIPT_DIR/scans"
  1981. "/mnt/data/scans"
  1982. "$SCRIPT_DIR/templates/scans-demo"
  1983. )
  1984. # Find an existing scans dir if SCANS_DIR is not set
  1985. if [[ -z $SCANS_DIR ]]; then
  1986. for d in "${scans_heirarchy[@]}"; do
  1987. if [[ -d $d ]]; then
  1988. declare -gx SCANS_DIR="$d"
  1989. break
  1990. fi
  1991. declare -gx SCANS_DIR="${scans_heirarchy[0]}"
  1992. done
  1993. fi
  1994. if ! [[ -d $SCANS_DIR ]]; then
  1995. # This is not something we do often, so ask
  1996. if ask "Create the scans directory: $SCANS_DIR?"; then
  1997. execute mkdir -p "$SCANS_DIR"
  1998. else
  1999. echo "No scans directory available, exiting"
  2000. exit 1;
  2001. fi
  2002. fi
  2003. # Make sure we are using the absolute path
  2004. SCANS_DIR=$(realpath -s "$SCANS_DIR")
  2005. # Find an output directory
  2006. local out_heirarchy=("${SCANS_DIR%/*}/out" "$SCRIPT_DIR/out" "/mnt/data/out")
  2007. for d in "${out_heirarchy[@]}"; do
  2008. if [[ -d $d ]]; then
  2009. debug "Using output directory: $d"
  2010. declare -g OUT_DIR="$d"
  2011. break
  2012. fi
  2013. done
  2014. if [[ -z $OUT_DIR ]]; then
  2015. echo "No output directory found"
  2016. declare -gx OUT_DIR="$SCRIPT_DIR/out"
  2017. # This is not something we do often, so ask
  2018. if ask "Create $SCRIPT_DIR/out?"; then
  2019. execute mkdir -p "$SCRIPT_DIR/out"
  2020. else
  2021. err "No output directory, attempting to continue..."
  2022. fi
  2023. fi
  2024. # Make sure we are using the absolute path
  2025. OUT_DIR=$(realpath -s "$OUT_DIR")
  2026. declare -ag PROJECTS=() # this array will hold all of the projects for this run
  2027. parse_input "$@" # parse arguments with getopt
  2028. # ((DEBUG)) && declare -p # when the going gets rough
  2029. interactive_header "$@"
  2030. for i in "${!PROJECTS[@]}"; do
  2031. if ! sanitize_pn "${PROJECTS[$i]}"; then
  2032. echo "Project name ${PROJECTS[$i]} is invalid"
  2033. echo "Enter a replacement"
  2034. ask_name "project" "ADD_PROJECTS" && unset "PROJECTS[i]" && PROJECTS+=("${ADD_PROJECTS[@]}")
  2035. fi
  2036. done
  2037. # Exclude modules from --exclude
  2038. for i in "${!MODULES[@]}"; do
  2039. [[ " ${EXCLUDE_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]] && unset "MODULES[$i]"
  2040. done
  2041. # Sanitize MODULES
  2042. for i in "${!MODULES[@]}"; do
  2043. if ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${MODULES[i]}[[:space:]] ]]; then
  2044. echo "Module ${MODULES[$i]} is not available, removing"
  2045. read -r -p "Enter replacement module name: " module
  2046. ! [[ " ${ALL_MODULES[*]} " =~ [[:space:]]${module}[[:space:]] ]] || { echo "RTFM"; return 1; }
  2047. MODULES[i]="$module"
  2048. fi
  2049. unset module
  2050. done
  2051. # Sanitize wrappers
  2052. for i in "${!WRAPPERS[@]}"; do
  2053. IFS=',' read -ra args <<< "${WRAPPERS[$i]}" # load the wrapper and args
  2054. if ! [[ " ${ALL_WRAPPERS[*]} " =~ [[:space:]]${args[0]}[[:space:]] ]]; then
  2055. echo "Wrapper ${args[0]} is not available, removing"
  2056. unset "WRAPPERS[$i]"
  2057. fi
  2058. done
  2059. # If module equals install_dependencies run install_dependencies
  2060. declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
  2061. [[ ${MODULES[*]} == "install_dependencies" ]] && install_dependencies
  2062. # Loop over projects
  2063. for PROJECT in "${PROJECTS[@]}"; do
  2064. declare -gx PROJECT
  2065. declare -gx PROJECT_SCANS_DIR="$SCANS_DIR/$PROJECT"
  2066. declare -gx PROJECT_DATE="${PROJECT:0:8}" # extract the first 8 characters (e.g., "20241215")
  2067. PROJECT_NO_DATE="${PROJECT:9}" # extract the part after the date and underscore
  2068. declare -gx PROJECT_USER="${PROJECT_NO_DATE%%_*}" # strip suffix to get user (e.g., "username")
  2069. declare -gx PROJECT_PREFIX="${PROJECT_DATE}_${PROJECT_USER}"
  2070. declare -gx PROJECT_NAME="${PROJECT_NO_DATE#*_}" # Remove the username and following underscore (e.g., "nameof_project")
  2071. declare -gx STUDIES_ARCHIVE_FILE="$OUT_DIR/StudiesDataArchive.txt"
  2072. declare -gx PROJECT_RESULTS_DIR="$OUT_DIR/$PROJECT"
  2073. declare -gx EASY_OUT_DIR="$PROJECT_RESULTS_DIR/easy"
  2074. # Set the automatic project directory prefix
  2075. declare -gx STUDY_PREFIX="${DATE}_${USER}" # reversed these so easier to sort and parse by date
  2076. # Use more advanced function to set the correct results directories
  2077. # This area could be streamlined and improved with better logic
  2078. [[ -z $EASY_RESULTS_DIR ]] && handle_results_dir "$EASY_OUT_DIR" "EASY_RESULTS_DIR"
  2079. [[ -z $STUDY_RESULTS_DIR ]] && handle_results_dir "$PROJECT_RESULTS_DIR" "STUDY_RESULTS_DIR"
  2080. # [[ -z $STUDY_RESULTS_DIR ]] && declare -gx STUDY_RESULTS_DIR="$PROJECT_RESULTS_DIR/${STUDY_PREFIX}_${PROJECT_NAME}"
  2081. declare -gx GTA_OUT_DIR="$STUDY_RESULTS_DIR/gta"
  2082. declare -gx GTF_OUT_DIR="$STUDY_RESULTS_DIR/gtf"
  2083. declare -gx STUDY_CONFIG_FILE="$STUDY_RESULTS_DIR/study_config"
  2084. handle_config "$STUDY_CONFIG_FILE"
  2085. debug "Project: $PROJECT"
  2086. debug "Active modules: ${MODULES[*]}"
  2087. debug "Active wrappers and their args: ${WRAPPERS[*]}"
  2088. # Run selected modules
  2089. for m in "${MODULES[@]}"; do
  2090. if ask "Run $m module?"; then
  2091. "$m" || return 1
  2092. fi
  2093. done
  2094. # Run selected wrappers
  2095. for wrapper in "${WRAPPERS[@]}"; do
  2096. IFS=',' read -ra args <<< "$wrapper" # load the wrapper and args
  2097. if ask "Run ${args[0]} wrapper with args ${args[*]:1}?"; then
  2098. "${args[0]}" "${args[@]:1}" || return 1
  2099. fi
  2100. done
  2101. done
  2102. [[ ${#MODULES[@]} -gt 0 ]] && echo "Successfully ran module(s): ${MODULES[*]}"
  2103. [[ ${#WRAPPERS[@]} -gt 0 ]] && echo "Successfully ran wrapper(s): ${WRAPPERS[*]}"
  2104. [[ ${#PROJECTS[@]} -gt 0 ]] && echo "On project(s): ${PROJECTS[*]}"
  2105. unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES PARSED_CONFIG YES
  2106. }
  2107. # During development it's better to just exit
  2108. # (Safer) main loop for automatic rerun
  2109. # if main "$@"; then
  2110. # for ((i=1; i<2; i++)); do
  2111. # main &&
  2112. # i=0
  2113. # done
  2114. # fi
  2115. main "$@"
  2116. exit $?