55 lines
1.4 KiB
Perl
55 lines
1.4 KiB
Perl
#!/usr/bin/env perl
|
|
use strict;
|
|
use warnings;
|
|
use diagnostics;
|
|
use File::Map qw(map_file);
|
|
|
|
my $infile = shift;
|
|
|
|
my $input;
|
|
|
|
map_file $input, $infile;
|
|
|
|
{
|
|
local $_ = $input;
|
|
(my $f = $infile) =~ s/(.*\/)?(.*)(\.[^\.]*){2}/$2/;
|
|
my %orfgene = (/(Y\w+)\s+(\w+)\n/g);
|
|
my @indices = (/\Q-- \E(\d+) of \d+\Q --\E/g);
|
|
my @ids = (/GOID\s+GO:(\d+)/g);
|
|
my @terms = (/TERM\s+(.*?)\n/g);
|
|
my @pvalues = (/\nCORRECTED P-VALUE\s+(\d.*?)\n/g);
|
|
my @clusterf = (/NUM_ANNOTATIONS\s+(\d+ of \d+)/g);
|
|
my @bgfreq = (/, vs (\d+ of \d+) in the genome/g);
|
|
my @orfs = (/The genes annotated to this node are:\n(.*?)\n/g);
|
|
|
|
s/, /:/g for @orfs;
|
|
|
|
my @genes;
|
|
for my $orf (@orfs) {
|
|
my @otmp = split /:/, $orf;
|
|
my @gtmp = map { $orfgene{$_} } @otmp;
|
|
push @genes, (join ':', @gtmp);
|
|
}
|
|
|
|
&header();
|
|
for my $i (0 .. (@ids - 1)) {
|
|
&report($f, $ids[$i], $terms[$i], $pvalues[$i], $clusterf[$i], $bgfreq[$i], $orfs[$i], $genes[$i]);
|
|
}
|
|
}
|
|
|
|
sub header {
|
|
print "REMc ID\tGO_term ID\tGO-term\tCluster frequency\tBackground frequency\tP-value\tORFs\tGenes\n";
|
|
}
|
|
|
|
sub report {
|
|
my ($f, $id, $term, $p, $cfreq, $bgfreq, $orfs, $genes) = @_;
|
|
|
|
$cfreq =~ /(\d+) of (\d+)/;
|
|
$cfreq = sprintf "%d out of %d genes, %.1f%%", $1, $2, (100*$1/$2);
|
|
|
|
$bgfreq =~ /(\d+) of (\d+)/;
|
|
$bgfreq = sprintf "%d out of %d genes, %.1f%%", $1, $2, (100*$1/$2);
|
|
|
|
print "$f\t$id\t$term\t$cfreq\t$bgfreq\t$p\t$orfs\t$genes\n";
|
|
}
|