Globalize gtr
This commit is contained in:
@@ -385,20 +385,24 @@ gtf() {
|
||||
process_dir="GTF/Process"
|
||||
function_dir="GTF/Function"
|
||||
component_dir="GTF/Component"
|
||||
out_dir="REMcRdy_lm_only"
|
||||
|
||||
py_gtf_dcon "$process_dir" "$out_dir"
|
||||
|
||||
py_gtf "$process_dir"
|
||||
|
||||
# Perform operations in each directory in parallel
|
||||
for d in "$process_dir" "$function_dir" "$component_dir"; do
|
||||
rsync -a "$process_dir/REMcRdy_lm_only" "$d"/
|
||||
rsync -a "$process_dir/$out_dir" "$d"/
|
||||
out_file="${d##*/}Results.txt" # Use the dirname to create each Results filename
|
||||
pl_gtf "$d" "$out_file" & # parallelize
|
||||
pl_gtf "$d" "$out_dir" & # parallelize
|
||||
py_gtf_concat "$d" "$out_dir" "$out_file"
|
||||
done
|
||||
|
||||
}
|
||||
|
||||
|
||||
# @section Submodules
|
||||
# @description Submodules provide functionaility to modules and are reusable between modules
|
||||
# Use a submodule when:
|
||||
# Use a submodule for:
|
||||
# * Calling external scripts
|
||||
# * Performing repetitive tasks
|
||||
# *
|
||||
@@ -408,7 +412,7 @@ submodule r_join_interact
|
||||
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
||||
r_join_interact() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
echo "Rscript JoinInteractExps3dev.R"
|
||||
debug "Rscript JoinInteractExps3dev.R"
|
||||
Rscript JoinInteractExps3dev.R
|
||||
out_file="REMcRdy_lm_only.csv"
|
||||
out_file2="Shift_only.csv"
|
||||
@@ -436,7 +440,7 @@ java_extract() {
|
||||
"ORF_List_Without_DAmPs.txt" 1 true true
|
||||
)
|
||||
|
||||
echo "${java_cmd[@]}"
|
||||
debug "${java_cmd[@]}"
|
||||
"${java_cmd[@]}"
|
||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||
}
|
||||
@@ -448,7 +452,7 @@ submodule r_add_shift_values
|
||||
r_add_shift_values() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
out_file="REMcHeatmaps/REMcWithShift.csv"
|
||||
echo "Rscript AddShiftVals2.R"
|
||||
debug "Rscript AddShiftVals2.R"
|
||||
Rscript AddShiftVals2.R
|
||||
rm -f "REMcHeatmaps/"*.pdf
|
||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||
@@ -460,10 +464,10 @@ submodule r_heat_maps_zscores
|
||||
r_heat_maps_zscores() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
out_file="REMcHeatmaps/compiledREMcHeatmaps.pdf"
|
||||
echo "Rscript REMcHeatmaps_zscores.R"
|
||||
debug "Rscript REMcHeatmaps_zscores.R"
|
||||
Rscript REMcHeatmaps_zscores.R
|
||||
pdfs=(REMcHeatmaps/*.pdf)
|
||||
echo "pdftk ${pdfs[*]} output $out_file"
|
||||
debug "pdftk ${pdfs[*]} output $out_file"
|
||||
pdftk "${pdfs[@]}" output "$out_file"
|
||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||
}
|
||||
@@ -477,7 +481,7 @@ r_heat_maps_homology() {
|
||||
source_file="REMcHeatmaps/REMcWithShift.csv"
|
||||
target_file="$work_dir/REMcWithShift.csv"
|
||||
out_file="$work_dir/Homology/compiledREMcHomologyHeatmaps.pdf"
|
||||
echo "rsync --archive $source_file $target_file"
|
||||
debug "rsync --archive $source_file $target_file"
|
||||
rsync --archive "$source_file" "$target_file"
|
||||
|
||||
# Clean old output
|
||||
@@ -499,13 +503,14 @@ r_heat_maps_homology() {
|
||||
}
|
||||
|
||||
|
||||
submodule py_gtf
|
||||
# @description Perform python portion of GTF
|
||||
submodule py_gtf_dcon
|
||||
# @description Perform python dcon portion of GTF
|
||||
# @arg $1 string Directory to process
|
||||
py_gtf() {
|
||||
# @arg $2 string Output directory name
|
||||
py_gtf_dcon() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
in_file="REMcRdy_lm_only.csv-finalTable.csv"
|
||||
out_file="$1/REMcRdy_lm_only/1-0-0-finaltable.csv"
|
||||
out_file="$1/$2/1-0-0-finaltable.csv"
|
||||
debug "$PYTHON DconJG2.py $in_file $1/"
|
||||
"$PYTHON" DconJG2.py "$in_file" "$1/"
|
||||
[[ -f $out_file ]] || (echo "$out_file does not exist"; return 1)
|
||||
@@ -513,43 +518,38 @@ py_gtf() {
|
||||
|
||||
|
||||
submodule pl_gtf
|
||||
# @description Perl module for GTF
|
||||
# @description Perl modules for GTF
|
||||
# @arg $1 string working directory
|
||||
# @arg $2 string output file
|
||||
# @arg $2 string output directory name to look for txt files
|
||||
pl_gtf() {
|
||||
debug "Running: ${FUNCNAME[0]}" "$@"
|
||||
set1="ORF_List_Without_DAmPs.txt"
|
||||
pushd "$1" || return 1
|
||||
|
||||
pushd "$1" || return 1
|
||||
set1="ORF_List_Without_DAmPs.txt"
|
||||
shopt -s nullglob
|
||||
set2=(REMcRdy_lm_only/*.txt)
|
||||
set2=("$2"/*.txt) # glob them all
|
||||
shopt -u nullglob
|
||||
|
||||
for s in "${set2[@]}"; do
|
||||
debug "pl_analyze $set1 $s"
|
||||
pl_analyze "$set1" "$s"
|
||||
debug "pl_terms2tsv $s"
|
||||
pl_terms2tsv "$s"
|
||||
for s2 in "${set2[@]}"; do
|
||||
debug "pl_analyze $set1 $s2"
|
||||
pl_gtf_analyze "$set1" "$s2"
|
||||
debug "pl_terms2tsv $s2"
|
||||
pl_gtf_terms2tsv "$s2"
|
||||
done
|
||||
|
||||
# Concat the process ontology outputs from the /REMcReady_lm_only folder
|
||||
echo "$PYTHON Concatenate_GTF_results.py REMcRdy_lm_only/ $2"
|
||||
echo "TODO: Concatenate_GTF_results.py should be translated to bash"
|
||||
"$PYTHON" Concatenate_GTF_results.py REMcRdy_lm_only/ "$2"
|
||||
|
||||
[[ -f $2 ]] || (echo "$2 does not exist"; return 1)
|
||||
popd || return 1
|
||||
}
|
||||
|
||||
|
||||
submodule pl_analyze
|
||||
submodule pl_gtf_analyze
|
||||
# @description Perl analyze submodule
|
||||
# This seems weird to me because we're just overwriting the same data for all set2 members
|
||||
# https://metacpan.org/dist/GO-TermFinder/view/examples/analyze.pl
|
||||
# Is there a reason you need a custom version and not the original from cpan?
|
||||
# @arg $1 string Set 1
|
||||
# @arg $2 string Set 2
|
||||
pl_analyze() {
|
||||
pl_gtf_analyze() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
script="analyze_v2.pl"
|
||||
an="gene_association.sgd"
|
||||
out_file="gene_ontology_edit.obo"
|
||||
@@ -558,22 +558,41 @@ pl_analyze() {
|
||||
}
|
||||
|
||||
|
||||
submodule pl_terms2tsv
|
||||
submodule pl_gtf_terms2tsv
|
||||
# @description Perl terms2tsv submodule
|
||||
# Probably should be translated to shell/python
|
||||
# @arg $1 string Set 2
|
||||
pl_terms2tsv() {
|
||||
pl_gtf_terms2tsv() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
script="terms2tsv_v4.pl"
|
||||
debug "$PERL $script $1.terms > $1.tsv"
|
||||
"$PERL" "$script" "$1.terms" > "$1.tsv"
|
||||
}
|
||||
|
||||
|
||||
submodule py_gtf_concat
|
||||
# @description Python concat submodule for GTF
|
||||
# Concat the process ontology outputs from the /REMcReady_lm_only folder
|
||||
# Probably should be translated to bash
|
||||
# @arg $1 string working directory
|
||||
# @arg $2 string output directory name to look for txt files
|
||||
# @arg $3 string output file
|
||||
py_gtf_concat() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
pushd "$1" || return 1
|
||||
script="Concatenate_GTF_results.py"
|
||||
debug "$PYTHON $script $2/ $3"
|
||||
"$PYTHON" "$script" "$2/" "$3"
|
||||
[[ -f $3 ]] || (echo "$3 does not exist"; return 1)
|
||||
popd || return 1
|
||||
}
|
||||
|
||||
|
||||
submodule r_compile_gtf
|
||||
# @description Compile GTF in R
|
||||
r_compile_gtf() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
echo "Rscript CompileGTF.R"
|
||||
debug "Rscript CompileGTF.R"
|
||||
Rscript CompileGTF.R
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user