From 8bada9fce1acc05990c15e6527f020f7c7a0b821 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Mon, 22 Jul 2024 17:48:27 -0400 Subject: [PATCH] Globalize gtr --- workflow/script-run-workflow | 91 ++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/workflow/script-run-workflow b/workflow/script-run-workflow index 314668cf..bed40e59 100755 --- a/workflow/script-run-workflow +++ b/workflow/script-run-workflow @@ -385,20 +385,24 @@ gtf() { process_dir="GTF/Process" function_dir="GTF/Function" component_dir="GTF/Component" + out_dir="REMcRdy_lm_only" + + py_gtf_dcon "$process_dir" "$out_dir" - py_gtf "$process_dir" # Perform operations in each directory in parallel for d in "$process_dir" "$function_dir" "$component_dir"; do - rsync -a "$process_dir/REMcRdy_lm_only" "$d"/ + rsync -a "$process_dir/$out_dir" "$d"/ out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename - pl_gtf "$d" "$out_file" & # parallelize + pl_gtf "$d" "$out_dir" & # parallelize + py_gtf_concat "$d" "$out_dir" "$out_file" done - } + + # @section Submodules # @description Submodules provide functionaility to modules and are reusable between modules -# Use a submodule when: +# Use a submodule for: # * Calling external scripts # * Performing repetitive tasks # * @@ -408,7 +412,7 @@ submodule r_join_interact # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv r_join_interact() { debug "Running: ${FUNCNAME[0]}" - echo "Rscript JoinInteractExps3dev.R" + debug "Rscript JoinInteractExps3dev.R" Rscript JoinInteractExps3dev.R out_file="REMcRdy_lm_only.csv" out_file2="Shift_only.csv" @@ -436,7 +440,7 @@ java_extract() { "ORF_List_Without_DAmPs.txt" 1 true true ) - echo "${java_cmd[@]}" + debug "${java_cmd[@]}" "${java_cmd[@]}" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) } @@ -448,7 +452,7 @@ submodule r_add_shift_values r_add_shift_values() { debug "Running: ${FUNCNAME[0]}" out_file="REMcHeatmaps/REMcWithShift.csv" - echo "Rscript AddShiftVals2.R" + debug "Rscript AddShiftVals2.R" Rscript AddShiftVals2.R rm -f "REMcHeatmaps/"*.pdf [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) @@ -460,10 +464,10 @@ submodule r_heat_maps_zscores r_heat_maps_zscores() { debug "Running: ${FUNCNAME[0]}" out_file="REMcHeatmaps/compiledREMcHeatmaps.pdf" - echo "Rscript REMcHeatmaps_zscores.R" + debug "Rscript REMcHeatmaps_zscores.R" Rscript REMcHeatmaps_zscores.R pdfs=(REMcHeatmaps/*.pdf) - echo "pdftk ${pdfs[*]} output $out_file" + debug "pdftk ${pdfs[*]} output $out_file" pdftk "${pdfs[@]}" output "$out_file" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) } @@ -477,7 +481,7 @@ r_heat_maps_homology() { source_file="REMcHeatmaps/REMcWithShift.csv" target_file="$work_dir/REMcWithShift.csv" out_file="$work_dir/Homology/compiledREMcHomologyHeatmaps.pdf" - echo "rsync --archive $source_file $target_file" + debug "rsync --archive $source_file $target_file" rsync --archive "$source_file" "$target_file" # Clean old output @@ -499,13 +503,14 @@ r_heat_maps_homology() { } -submodule py_gtf -# @description Perform python portion of GTF +submodule py_gtf_dcon +# @description Perform python dcon portion of GTF # @arg $1 string Directory to process -py_gtf() { +# @arg $2 string Output directory name +py_gtf_dcon() { debug "Running: ${FUNCNAME[0]}" in_file="REMcRdy_lm_only.csv-finalTable.csv" - out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv" + out_file="$1/$2/1-0-0-finaltable.csv" debug "$PYTHON DconJG2.py $in_file $1/" "$PYTHON" DconJG2.py "$in_file" "$1/" [[ -f $out_file ]] || (echo "$out_file does not exist"; return 1) @@ -513,43 +518,38 @@ py_gtf() { submodule pl_gtf -# @description Perl module for GTF +# @description Perl modules for GTF # @arg $1 string working directory -# @arg $2 string output file +# @arg $2 string output directory name to look for txt files pl_gtf() { debug "Running: ${FUNCNAME[0]}" "$@" - set1="ORF_List_Without_DAmPs.txt" - pushd "$1" || return 1 + pushd "$1" || return 1 + set1="ORF_List_Without_DAmPs.txt" shopt -s nullglob - set2=(REMcRdy_lm_only/*.txt) + set2=("$2"/*.txt) # glob them all shopt -u nullglob - for s in "${set2[@]}"; do - debug "pl_analyze $set1 $s" - pl_analyze "$set1" "$s" - debug "pl_terms2tsv $s" - pl_terms2tsv "$s" + for s2 in "${set2[@]}"; do + debug "pl_analyze $set1 $s2" + pl_gtf_analyze "$set1" "$s2" + debug "pl_terms2tsv $s2" + pl_gtf_terms2tsv "$s2" done - # Concat the process ontology outputs from the /REMcReady_lm_only folder - echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2" - echo "TODO: Concatenate_GTF_results.py should be translated to bash" - "$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2" - - [[ -f $2 ]] || (echo "$2 does not exist"; return 1) popd || return 1 } -submodule pl_analyze +submodule pl_gtf_analyze # @description Perl analyze submodule # This seems weird to me because we're just overwriting the same data for all set2 members # https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl # Is there a reason you need a custom version and not the original from cpan? # @arg $1 string Set 1 # @arg $2 string Set 2 -pl_analyze() { +pl_gtf_analyze() { + debug "Running: ${FUNCNAME[0]}" script="analyze_v2.pl" an="gene_association.sgd" out_file="gene_ontology_edit.obo" @@ -558,22 +558,41 @@ pl_analyze() { } -submodule pl_terms2tsv +submodule pl_gtf_terms2tsv # @description Perl terms2tsv submodule # Probably should be translated to shell/python # @arg $1 string Set 2 -pl_terms2tsv() { +pl_gtf_terms2tsv() { + debug "Running: ${FUNCNAME[0]}" script="terms2tsv_v4.pl" debug "$PERL $script $1.terms > $1.tsv" "$PERL" "$script" "$1.terms" > "$1.tsv" } +submodule py_gtf_concat +# @description Python concat submodule for GTF +# Concat the process ontology outputs from the /REMcReady_lm_only folder +# Probably should be translated to bash +# @arg $1 string working directory +# @arg $2 string output directory name to look for txt files +# @arg $3 string output file +py_gtf_concat() { + debug "Running: ${FUNCNAME[0]}" + pushd "$1" || return 1 + script="Concatenate_GTF_results.py" + debug "$PYTHON $script $2/ $3" + "$PYTHON" "$script" "$2/" "$3" + [[ -f $3 ]] || (echo "$3 does not exist"; return 1) + popd || return 1 +} + + submodule r_compile_gtf # @description Compile GTF in R r_compile_gtf() { debug "Running: ${FUNCNAME[0]}" - echo "Rscript CompileGTF.R" + debug "Rscript CompileGTF.R" Rscript CompileGTF.R }