Globalize gtr

This commit is contained in:
2024-07-22 17:48:27 -04:00
parent be1e9869b3
commit 8bada9fce1

View File

@@ -385,20 +385,24 @@ gtf() {
process_dir="GTF/Process"
function_dir="GTF/Function"
component_dir="GTF/Component"
out_dir="REMcRdy_lm_only"
py_gtf_dcon "$process_dir" "$out_dir"
py_gtf "$process_dir"
# Perform operations in each directory in parallel
for d in "$process_dir" "$function_dir" "$component_dir"; do
rsync -a "$process_dir/REMcRdy_lm_only" "$d"/
rsync -a "$process_dir/$out_dir" "$d"/
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
pl_gtf "$d" "$out_file" & # parallelize
pl_gtf "$d" "$out_dir" & # parallelize
py_gtf_concat "$d" "$out_dir" "$out_file"
done
}
# @section Submodules
# @description Submodules provide functionaility to modules and are reusable between modules
# Use a submodule when:
# Use a submodule for:
# * Calling external scripts
# * Performing repetitive tasks
# *
@@ -408,7 +412,7 @@ submodule r_join_interact
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
r_join_interact() {
debug "Running: ${FUNCNAME[0]}"
echo "Rscript JoinInteractExps3dev.R"
debug "Rscript JoinInteractExps3dev.R"
Rscript JoinInteractExps3dev.R
out_file="REMcRdy_lm_only.csv"
out_file2="Shift_only.csv"
@@ -436,7 +440,7 @@ java_extract() {
"ORF_List_Without_DAmPs.txt" 1 true true
)
echo "${java_cmd[@]}"
debug "${java_cmd[@]}"
"${java_cmd[@]}"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
}
@@ -448,7 +452,7 @@ submodule r_add_shift_values
r_add_shift_values() {
debug "Running: ${FUNCNAME[0]}"
out_file="REMcHeatmaps/REMcWithShift.csv"
echo "Rscript AddShiftVals2.R"
debug "Rscript AddShiftVals2.R"
Rscript AddShiftVals2.R
rm -f "REMcHeatmaps/"*.pdf
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
@@ -460,10 +464,10 @@ submodule r_heat_maps_zscores
r_heat_maps_zscores() {
debug "Running: ${FUNCNAME[0]}"
out_file="REMcHeatmaps/compiledREMcHeatmaps.pdf"
echo "Rscript REMcHeatmaps_zscores.R"
debug "Rscript REMcHeatmaps_zscores.R"
Rscript REMcHeatmaps_zscores.R
pdfs=(REMcHeatmaps/*.pdf)
echo "pdftk ${pdfs[*]} output $out_file"
debug "pdftk ${pdfs[*]} output $out_file"
pdftk "${pdfs[@]}" output "$out_file"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
}
@@ -477,7 +481,7 @@ r_heat_maps_homology() {
source_file="REMcHeatmaps/REMcWithShift.csv"
target_file="$work_dir/REMcWithShift.csv"
out_file="$work_dir/Homology/compiledREMcHomologyHeatmaps.pdf"
echo "rsync --archive $source_file $target_file"
debug "rsync --archive $source_file $target_file"
rsync --archive "$source_file" "$target_file"
# Clean old output
@@ -499,13 +503,14 @@ r_heat_maps_homology() {
}
submodule py_gtf
# @description Perform python portion of GTF
submodule py_gtf_dcon
# @description Perform python dcon portion of GTF
# @arg $1 string Directory to process
py_gtf() {
# @arg $2 string Output directory name
py_gtf_dcon() {
debug "Running: ${FUNCNAME[0]}"
in_file="REMcRdy_lm_only.csv-finalTable.csv"
out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv"
out_file="$1/$2/1-0-0-finaltable.csv"
debug "$PYTHON DconJG2.py $in_file $1/"
"$PYTHON" DconJG2.py "$in_file" "$1/"
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
@@ -513,43 +518,38 @@ py_gtf() {
submodule pl_gtf
# @description Perl module for GTF
# @description Perl modules for GTF
# @arg $1 string working directory
# @arg $2 string output file
# @arg $2 string output directory name to look for txt files
pl_gtf() {
debug "Running: ${FUNCNAME[0]}" "$@"
set1="ORF_List_Without_DAmPs.txt"
pushd "$1" || return 1
pushd "$1" || return 1
set1="ORF_List_Without_DAmPs.txt"
shopt -s nullglob
set2=(REMcRdy_lm_only/*.txt)
set2=("$2"/*.txt) # glob them all
shopt -u nullglob
for s in "${set2[@]}"; do
debug "pl_analyze $set1 $s"
pl_analyze "$set1" "$s"
debug "pl_terms2tsv $s"
pl_terms2tsv "$s"
for s2 in "${set2[@]}"; do
debug "pl_analyze $set1 $s2"
pl_gtf_analyze "$set1" "$s2"
debug "pl_terms2tsv $s2"
pl_gtf_terms2tsv "$s2"
done
# Concat the process ontology outputs from the /REMcReady_lm_only folder
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2"
echo "TODO: Concatenate_GTF_results.py should be translated to bash"
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2"
[[ -f $2 ]] || (echo "$2 does not exist"; return 1)
popd || return 1
}
submodule pl_analyze
submodule pl_gtf_analyze
# @description Perl analyze submodule
# This seems weird to me because we're just overwriting the same data for all set2 members
# https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl
# Is there a reason you need a custom version and not the original from cpan?
# @arg $1 string Set 1
# @arg $2 string Set 2
pl_analyze() {
pl_gtf_analyze() {
debug "Running: ${FUNCNAME[0]}"
script="analyze_v2.pl"
an="gene_association.sgd"
out_file="gene_ontology_edit.obo"
@@ -558,22 +558,41 @@ pl_analyze() {
}
submodule pl_terms2tsv
submodule pl_gtf_terms2tsv
# @description Perl terms2tsv submodule
# Probably should be translated to shell/python
# @arg $1 string Set 2
pl_terms2tsv() {
pl_gtf_terms2tsv() {
debug "Running: ${FUNCNAME[0]}"
script="terms2tsv_v4.pl"
debug "$PERL $script $1.terms > $1.tsv"
"$PERL" "$script" "$1.terms" > "$1.tsv"
}
submodule py_gtf_concat
# @description Python concat submodule for GTF
# Concat the process ontology outputs from the /REMcReady_lm_only folder
# Probably should be translated to bash
# @arg $1 string working directory
# @arg $2 string output directory name to look for txt files
# @arg $3 string output file
py_gtf_concat() {
debug "Running: ${FUNCNAME[0]}"
pushd "$1" || return 1
script="Concatenate_GTF_results.py"
debug "$PYTHON $script $2/ $3"
"$PYTHON" "$script" "$2/" "$3"
[[ -f $3 ]] || (echo "$3 does not exist"; return 1)
popd || return 1
}
submodule r_compile_gtf
# @description Compile GTF in R
r_compile_gtf() {
debug "Running: ${FUNCNAME[0]}"
echo "Rscript CompileGTF.R"
debug "Rscript CompileGTF.R"
Rscript CompileGTF.R
}