comparison COG/bac-genomics-scripts/po2anno/po2anno.pl @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
comparison
equal deleted inserted replaced
2:97e4e3e818b6 3:e42d30da7a74
1 #!/usr/bin/perl
2
3 #######
4 # POD #
5 #######
6
7 =pod
8
9 =head1 NAME
10
11 C<po2anno.pl> - create an annotation comparison matrix from Proteinortho5 output
12
13 =head1 SYNOPSIS
14
15 C<perl po2anno.pl -i matrix.proteinortho -d genome_fasta_dir/ -l -a E<gt> annotation_comparison.tsv>
16
17 =head1 DESCRIPTION
18
19 Supplement an ortholog/paralog output matrix from a
20 L<B<Proteinortho5>|http://www.bioinf.uni-leipzig.de/Software/proteinortho/>
21 calculation with annotation information. The resulting tab-separated
22 annotation comparison matrix (ACM) is mainly intended for the
23 transfer of high quality annotations from reference genomes to
24 homologs (orthologs and co-orthologs/paralogs) in a query genome
25 (e.g. in conjunction with L<C<tbl2tab.pl>|/tbl2tab>). But of course
26 it can also be used to have a quick glance at the annotation of
27 genes present only in a couple of input genomes in comparison to the
28 others.
29
30 Annotation is retrieved from multi-FASTA files created with
31 L<C<cds_extractor.pl>|/cds_extractor>. See
32 L<C<cds_extractor.pl>|/cds_extractor> for a description of the
33 format. These files are used as input for the PO analysis and option
34 B<-d> for C<po2anno.pl>.
35
36 B<Proteinortho5> (PO) has to be run with option B<-singles> to include
37 also genes without orthologs, so-called singletons/ORFans, for each
38 genome in the PO matrix (see the
39 L<PO manual|http://www.bioinf.uni-leipzig.de/Software/proteinortho/manual.html>).
40 Additionally, option B<-selfblast> is recommended to enhance paralog
41 detection by PO.
42
43 Each orthologous group (OG) is listed in a row of the resulting ACM,
44 the first column holds the OG numbers from the PO input matrix (i.e.
45 line number minus one). The following columns specify the
46 orthologous CDS for each input genome. For each CDS the ID,
47 optionally the length in bp (option B<-l>), gene, EC number(s), and
48 product are shown depending on their presence in the CDS's
49 annotation. The ID is in most cases the locus tag (see
50 L<C<cds_extractor.pl>|/cds_extractor>). If several EC numbers exist
51 for a single CDS they're separated by ';'. If an OG includes
52 paralogs, i.e. co-orthologs from a single genome, these will be
53 printed in the following row(s) B<without> a new OG number in the
54 first column. The order of paralogous CDSs within an OG is
55 arbitrarily.
56
57 The OGs are sorted numerically via the query ID (see option B<-q>).
58 If option B<-a> is set, the non-query OGs are appended to the output
59 after the query OGs, sorted numerically via OG number.
60
61 =head1 OPTIONS
62
63 =head2 Mandatory options
64
65 =over 20
66
67 =item B<-i>=I<str>, B<-input>=I<str>
68
69 Proteinortho (PO) result matrix (*.proteinortho or *.poff), or piped C<STDIN> (-)
70
71 =item B<-d>=I<str>, B<-dir_genome>=I<str>
72
73 Path to the directory including the genome multi-FASTA PO input
74 files (*.faa or *.ffn), created with L<C<cds_extractor.pl>|/cds_extractor>
75
76 =back
77
78 =head2 Optional options
79
80 =over 20
81
82 =item B<-h>, B<-help>
83
84 Help (perldoc POD)
85
86 =item B<-q>=I<str>, B<-query>=I<str>
87
88 Query genome (has to be identical to the string in the PO matrix)
89 [default = first one in alphabetical order]
90
91 =item B<-l>, B<-length>
92
93 Include length of each CDS in bp
94
95 =item B<-a>, B<-all>
96
97 Append non-query orthologous groups (OGs) to the output
98
99 =item B<-v>, B<-version>
100
101 Print version number to C<STDERR>
102
103 =back
104
105 =head1 OUTPUT
106
107 =over 20
108
109 =item C<STDOUT>
110
111 The resulting tab-delimited ACM is printed to C<STDOUT>. Redirect or
112 pipe into another tool as needed (e.g. C<cut>, C<grep>, C<head>, or
113 C<tail>).
114
115 =back
116
117 =head1 EXAMPLES
118
119 =head2 L<C<cds_extractor.pl>|/cds_extractor>
120
121 =over
122
123 =item C<for i in *.[gbk|embl]; do perl cds_extractor.pl -i $i [-p|-n]; done>
124
125 =back
126
127 =head2 L<B<Proteinortho5>|http://www.bioinf.uni-leipzig.de/Software/proteinortho/>
128
129 =over
130
131 =item C<proteinortho5.pl -graph [-synteny] -cpus=# -selfblast -singles -identity=50 -cov=50 -blastParameters='-use_sw_tback [-seg no|-dust no]' *.[faa|ffn]>
132
133 =back
134
135 =head2 C<po2anno.pl>
136
137 =over
138
139 =item C<perl po2anno.pl -i matrix.[proteinortho|poff] -d genome_fasta_dir/ -q query.[faa|ffn] -l -a E<gt> annotation_comparison.tsv>
140
141 =back
142
143 =head1 VERSION
144
145 0.2.2 update: 23-10-2015
146 0.1 18-12-2014
147
148 =head1 AUTHOR
149
150 Andreas Leimbach aleimba[at]gmx[dot]de
151
152 =head1 LICENSE
153
154 This program is free software: you can redistribute it and/or modify
155 it under the terms of the GNU General Public License as published by
156 the Free Software Foundation; either version 3 (GPLv3) of the
157 License, or (at your option) any later version.
158
159 This program is distributed in the hope that it will be useful, but
160 WITHOUT ANY WARRANTY; without even the implied warranty of
161 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
162 General Public License for more details.
163
164 You should have received a copy of the GNU General Public License
165 along with this program. If not, see L<http://www.gnu.org/licenses/>.
166
167 =cut
168
169
170 ########
171 # MAIN #
172 ########
173
174 use strict;
175 use warnings;
176 use autodie;
177 use Getopt::Long;
178 use Pod::Usage;
179
180 my $Cmdline = "$0 @ARGV"; # used call command
181
182 ### Get the options with Getopt::Long
183 my $PO_Matrix_File; # PO result matrix (*.proteinortho or *.poff)
184 my $Genome_Dir; # directory with genome multi-FASTAs (PO input)
185 my $Query; # query genome (first column in output)
186 my $Opt_Length; # include CDS nucleotide lengths in output
187 my $Opt_All_OGs; # print also all non-query OGs to output
188 my $VERSION = '0.2.2';
189 my ($Opt_Version, $Opt_Help);
190 GetOptions ('input=s' => \$PO_Matrix_File,
191 'dir_genome=s' => \$Genome_Dir,
192 'query=s' => \$Query,
193 'length' => \$Opt_Length,
194 'all' => \$Opt_All_OGs,
195 'version' => \$Opt_Version,
196 'help|?' => \$Opt_Help);
197
198
199
200 ### Run perldoc on POD
201 pod2usage(-verbose => 2) if ($Opt_Help);
202 die "$0 $VERSION\n" if ($Opt_Version);
203
204 if (!$PO_Matrix_File || !$Genome_Dir) {
205 my $warning = "\n### Fatal error: Mandatory options '-i' or '-d' or their arguments are missing!\n";
206 pod2usage(-verbose => 1, -message => $warning, -exitval => 2);
207 }
208 die "\n### Fatal error: Proteinortho matrix file '$PO_Matrix_File' does not exist: $!\n" if (!-e $PO_Matrix_File);
209 $Genome_Dir =~ s/\/$//; # get rid of a potential '/' at the end of $Genome_Dir path
210 die "\n### Fatal error: Genome directory '$Genome_Dir' does not exist: $!\n" if (!-d $Genome_Dir);
211
212 print STDERR "Script call command: $Cmdline\n"; # print call command after '-h|-v'
213
214
215
216 ### Pipe input from STDIN or open input file
217 my $PO_Matrix_Fh;
218 if ($PO_Matrix_File eq '-') { # file input via STDIN
219 $PO_Matrix_Fh = *STDIN; # capture typeglob of STDIN
220 } else { # input via input file
221 open ($PO_Matrix_Fh, "<", "$PO_Matrix_File");
222 }
223
224
225
226 ### Parse OGs in input PO matrix
227 print STDERR "Parsing Proteinortho input matrix '$PO_Matrix_File' ...\n"; # run status of script
228 my @Genome_Files; # store genome filename column order
229 my %Ortho_Groups; # hash of hash with OG# as key, internal hash with genome filename as key and IDs (e.g. locus tags) in anonymous array
230 while (<$PO_Matrix_Fh>) {
231 chomp;
232
233 # check PO input file header and get genome file names
234 if ($. == 1) { # header of PO matrix file (first line)
235 die "\n### Fatal error:\nProteinortho input matrix '$PO_Matrix_File' does not have the mandatory header line, which starts with the first three tab-separated mandatory columns:\n# Species\tGenes\tAlg.-Conn.\n" if (!/^# Species\tGenes\tAlg\.-Conn\./);
236
237 # get input genomes from PO matrix and check $Query existence
238 my $query_present = 0 if ($Query);
239 foreach (split(/\t/, $_)) {
240 next if (/# Species|Genes|Alg\.-Conn\./); # non-genome header columns
241 $query_present = 1 if ($Query && $Query eq $_);
242 push (@Genome_Files, $_); # store genome in array
243 }
244 die "\n### Fatal error:\nGiven query '$Query' not found in the header of the input Proteinortho matrix '$PO_Matrix_File'! Make sure the string corresponds exactly (also lower/uppercase) to the header of the matrix and the input file in '$Genome_Dir'!\n" if ($Query && !$query_present);
245 next; # skip header line of PO matrix for subsequent code
246 }
247
248 # parse PO ortholog matrix
249 my ($organisms, $genes, $alg_conn, @og) = split(/\t/); # $alg_conn not used
250 my $organism_count = 0; # count number of organisms in OG to compare to '$organisms' (should be ok from PO)
251 my $gene_count = 0; # count number of genes in OG to compare to '$genes' (should be ok from PO)
252 my $column = -1; # column counter to associate column to correct genome file in OG (@Genome_Files array zero-based, thus start with '-1')
253 foreach (@og) {
254 $column++;
255 next if (/^\*$/); # skip empty columns in PO matrix (indicated by '*')
256 $organism_count++;
257 my @paralogs = split(',');
258 foreach (sort {$a cmp $b} @paralogs) {
259 $gene_count++;
260 push(@{ $Ortho_Groups{$.-1}->{$Genome_Files[$column]} }, $_); # push into anonymous array in hash of hash ($.-1 = OG number, because of header)
261 }
262 }
263 die "\n### Fatal error:\nThe indicated number of species ($organisms) does not fit to the counted number of organisms ($organism_count) in the orthologous group of line '$.' of the Proteinortho input matrix '$PO_Matrix_File'!\n" if ($organism_count != $organisms); # to check PO matrix
264 die "\n### Fatal error:\nThe indicated number of genes ($genes) does not fit to the counted number of orthologs/paralogs ($gene_count) in the orthologous group of line '$.' of the Proteinortho input matrix '$PO_Matrix_File'!\n" if ($gene_count != $genes); # to check PO matrix
265 }
266 close $PO_Matrix_Fh;
267
268
269
270 ### Sort @Genome_Files and set $Query as first element (not possible above because has to follow $column!)
271 @Genome_Files = sort {lc($a) cmp lc($b)} @Genome_Files; # sort regardless of upper/lowercase
272 if ($Query) {
273 @Genome_Files = grep ($_ ne $Query, @Genome_Files); # remove $Query from @Genome_Files
274 unshift (@Genome_Files, $Query); # set as first element
275 } elsif (!$Query) {
276 $Query = $Genome_Files[0]; # $Query first element in sorted array, when not given with option '-q'
277 }
278
279
280
281 ### Parse annotations in genome multi-FASTAs
282 print STDERR "Parsing annotation from multi-FASTA CDS genome files in directory '$Genome_Dir' ...\n";
283 my %Annotation; # hash of hash to store the annotation of the genome files
284 my %Anno_Features; # hash of hash to store which annotation features are present overall in each individual genome multi-FASTA (optional are 'gene/g=', 'product/p=' and 'EC_number/ec=' tags)
285 foreach my $genome (@Genome_Files) {
286 my $genome_file_path = "$Genome_Dir/$genome";
287 check_file_exist($genome_file_path); # subroutine
288 open (my $genome_fh, "<", "$genome_file_path");
289
290 # parse anno in current genome file
291 while (my $anno_line = <$genome_fh>) {
292 chomp $anno_line;
293 die "\n### Fatal error:\n'$genome_file_path' is not a FASTA input file. First line of the file should be a FASTA ID/header line and start with a '>':\n$anno_line\n" if ($anno_line !~ /^>/ && $. == 1);
294 next if ($anno_line !~ /^>/); # skip non-ID FASTA lines
295
296 my ($id, $gene, $product, $length, $organism, $ec); # $organism not used
297 foreach (split(/\s/, $anno_line)) {
298 if (/^>(.+)$/) {
299 $id = $1;
300 } elsif (/g=(.*)$/) {
301 $gene = $1;
302 } elsif (/p=(.*)$/) {
303 warn "\n### Warning:\nNo product annotation (p=) in file '$genome_file_path' on line:\n$anno_line\nProceeding ...\n" if (!$1);
304 $product = $1;
305 $product =~ s/\_/ /g;
306 } elsif (/l=(.*)$/) {
307 my ($start, $stop) = split(/\.\./, $1);
308 $length = abs($stop - $start) + 1;
309 } elsif (/o=(.*)$/) { # $organism not used
310 $organism = $1;
311 $organism =~ s/\_/ /g;
312 } elsif (/ec=(.*)$/) {
313 $ec = $1;
314 } else {
315 die "\n### Fatal error:\nAnnotation in file '$genome_file_path' is not recognized on the following line. Please use 'cds_extractor.pl' to create your multi-FASTA protein genome files.\n$anno_line\n";
316 }
317 }
318 die "\n### Fatal error:\nThe following FASTA ID of file '$genome_file_path' is not unique but has to be considering ALL input genome files (actually Proteinortho should have complained already). Please modify all repetitive occurences.\n$id\n" if ($Annotation{$id});
319
320 # fill annotation hash of hash
321 $Annotation{$id} = {'genome' => $genome,
322 'gene' => $gene,
323 'product' => $product,
324 'length' => $length,
325 'ec' => $ec};
326
327 # store which 'optional' annotation features are present overall in a genome
328 # (used to have lots of greps on %Annotation for this purpose below, but lets save performance)
329 $Anno_Features{$genome}{'gene'} = 1 if ($gene && !$Anno_Features{$genome}->{'gene'});
330 $Anno_Features{$genome}{'product'} = 1 if ($product && !$Anno_Features{$genome}->{'product'});
331 $Anno_Features{$genome}{'ec'} = 1 if ($ec && !$Anno_Features{$genome}->{'ec'});
332 }
333 close $genome_fh;
334 }
335
336
337
338 ### Check if CDS counts in multi-FASTA genome files and PO matrix are equal and print header of output
339 print STDERR "Comparing annotation CDS counts to orthologous group CDS counts and printing header for output matrix ...\n";
340 my $Query_CDS_Count; # store number of query CDS for stat report at end
341 print "# OG"; # first column of header
342 foreach my $genome (@Genome_Files) {
343 my $ortho_cds_count = map ($Ortho_Groups{$_}->{$genome} ? @{ $Ortho_Groups{$_}->{$genome} } : (), keys %Ortho_Groups); # de-reference anonymous array in hash of hash
344 # map evaluates BLOCK or EXPR in list context and returns the LIST value composed of the results of each such evaluation (each element of LIST may produce zero, one, or more elements in the returned value <=> grep returns only one value). In scalar context, returns the total number of elements so generated.
345
346 # the map line above is a fancy way of writing
347 #foreach (keys %Ortho_Groups) {
348 #$ortho_cds_count += @{ $Ortho_Groups{$_}->{$genome} } if ($Ortho_Groups{$_}->{$genome});
349 #}
350
351 my $anno_cds_count = grep ($Annotation{$_}->{'genome'} eq $genome, keys %Annotation);
352 $Query_CDS_Count = $anno_cds_count if ($Query eq $genome);
353 die "\n### Fatal error:\nThere are more CDSs in file '$genome' than for this genome in the Proteinortho matrix '$PO_Matrix_File', but counts have to be equal. Please run Proteinortho5 with option '-singles' to include also genes without orthologs, so-called singletons/ORFans (recommended is also option '-selfblast' to enhance paralog detection).\n" if ($ortho_cds_count < $anno_cds_count);
354 die "\n### Fatal error:\nThere are less CDSs in file '$genome' than for this genome in the Proteinortho matrix '$PO_Matrix_File', but counts have to be equal. Please check if the files are correct.\n" if ($ortho_cds_count > $anno_cds_count); # overlaps with ID-specific check in sub 'print_matrix'
355
356 # print header fields for output
357 print "\t$genome";
358 print "\tlength [bp]" if ($Opt_Length);
359 print "\tgene" if ($Anno_Features{$genome}->{'gene'});
360 print "\tec" if ($Anno_Features{$genome}->{'ec'});
361 print "\tproduct" if ($Anno_Features{$genome}->{'product'}); # very unlikely that there's a whole genome without product annotation ...
362 }
363 print "\n";
364
365
366
367 ### Print annotation comparison matrix
368 print STDERR "Printing output annotation comparison matrix for query OGs ...\n";
369 my %Query_ID_Seen; # query IDs already processed for output (because of paralogous CDSs in the same OGs)
370 my %Query_OGs; # OGs containing query CDSs, to skip for non-query OGs (option '-a') below
371 my $Query_Specific_OGs = 0; # count number of OGs including only query CDSs (not including CDSs of the other genomes)
372 my $Query_Singletons = 0; # count query singletons/ORFans (different to '$Query_Singleton_OGs' because of paralogous CDSs)
373 foreach my $id (sort { $a cmp $b } grep ($Annotation{$_}->{'genome'} eq $Query, keys %Annotation)) { # get IDs only for the query genome, sorted
374 next if ($Query_ID_Seen{$id});
375
376 # get OG for current query $id (I'm sure this can also be resolved by a fancy map/grep construction, but I didn't get it to work)
377 my $id_og; # OG containing the current query ID
378 foreach my $og (keys %Ortho_Groups) {
379 foreach (@{ $Ortho_Groups{$og}->{$Query} }) {
380 if ($_ eq $id) {
381 $id_og = $og;
382 last;
383 }
384 }
385 last if ($id_og);
386 }
387 $Query_OGs{$id_og} = 1;
388
389 print_matrix($id_og, \%Query_ID_Seen); # subroutine to print the annotation comparison matrix for the current OG to STDOUT
390 }
391
392
393
394 ### Optionally, print also non-query OGs/singletons
395 if ($Opt_All_OGs) {
396 print STDERR "Printing output annotation comparison matrix for non-query OGs (option '-all') ...\n";
397 foreach my $og (sort {$a <=> $b} keys %Ortho_Groups) { # sort from smallest to largest OG
398 next if ($Query_OGs{$og});
399 print_matrix($og); # subroutine
400 }
401 }
402
403
404
405 ### Print some final stats
406 print STDERR "\nFinished creating Proteinortho annotation comparison matrix with query genome '$Query':\n";
407 print STDERR "Total genomes: ", scalar @Genome_Files, "\n";
408 print STDERR "Total CDSs: ", scalar keys %Annotation, "\n";
409 print STDERR "Total query CDSs: $Query_CDS_Count\n";
410 print STDERR "Total OGs: ", scalar keys %Ortho_Groups, "\n";
411 print STDERR "OGs including query CDSs: ", scalar keys %Query_OGs, "\n"; # 'scalar grep($Ortho_Groups{$_}->{$Query} , keys %Ortho_Groups)' should actually be the same, but somehow doesn't work ... (what about the fancy map in the CDS count check above?)
412 print STDERR "Query-specific OGs (not including CDSs of the other genomes): $Query_Specific_OGs\n";
413 print STDERR "Total query singletons/ORFans: $Query_Singletons\n";
414
415
416 exit;
417
418
419 #############
420 #Subroutines#
421 #############
422
423 ### Subroutine to test for file existence
424 sub check_file_exist {
425 my $file = shift;
426 die "\n### Fatal error:\nGenome file '$file' is listed in the Proteinortho matrix, but does not exist in directory '$Genome_Dir': $!\n" if (!-e $file);
427 return 1;
428 }
429
430
431
432 ### Subroutine to print resulting annotation comparison matrix to STDOUT
433 sub print_matrix {
434 my ($og, $query_id_seen) = @_; # $query_id_seen hash-ref to %Query_ID_Seen ($query_id_seen not given/defined if called above for non-query OGs, option '-a')
435
436 # get max count of paralogs/co-orthologs over all genomes in current OG for row printing below
437 my $max_paralog_count = 0;
438 foreach my $genome (keys %{$Ortho_Groups{$og}}) {
439 my $paralog_count = 0;
440 foreach (@{ $Ortho_Groups{$og}->{$genome} }) { # only needed for genomes in the CURRENT $og <=> ALL genomes needed below
441 $query_id_seen->{$_} = 1 if ($query_id_seen && $genome eq $Query); # store query IDs that have been processed
442 $paralog_count++;
443 }
444 if ($genome eq $Query && keys %{$Ortho_Groups{$og}} == 1) { # query-specific OGs (not including CDSs of the other genomes)
445 $Query_Specific_OGs++;
446 $Query_Singletons += $paralog_count;
447 }
448 $max_paralog_count = $paralog_count if ($paralog_count > $max_paralog_count);
449 }
450
451 # print output matrix for current OG
452 print "$og";
453 for (my $i = 0; $i < $max_paralog_count; $i++) { # '<' because array zero-based
454 print "\t"; # tab after OG-nr
455
456 foreach my $genome (@Genome_Files) { # for ALL genomes
457
458 # CDS(s) of genome present in current $og and within $max_paralog_count
459 if ($Ortho_Groups{$og}->{$genome}->[$i]) {
460 die "\n### Fatal error:\nID '$Ortho_Groups{$og}->{$genome}->[$i]' present in the Proteinortho matrix '$PO_Matrix_File' in OG '$og' but not present in the corresponding genome '$genome' multi-FASTA file. However, all IDs in the matrix and the genome files in directory '$Genome_Dir' need to be correspondent to each other. Make sure you chose the correct directory with the input genome files for the current Proteinortho matrix!\n" unless ($Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}); # overlaps with check during the CDS count comparison of FASTA files and PO matrix (but here not all IDs are controlled without option '-a')
461
462 print "$Ortho_Groups{$og}->{$genome}->[$i]\t"; # print CDS ID
463 print $Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}->{'length'}, "\t" if ($Opt_Length);
464 if ($Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}->{'gene'}) {
465 print $Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}->{'gene'}, "\t";
466 } elsif ($Anno_Features{$genome}->{'gene'}) { # tab only if this genome has a 'g=/gene' annotation at all (stored in hash of hash %Anno_Features)
467 print "\t";
468 }
469 if ($Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}->{'ec'}) {
470 print $Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}->{'ec'}, "\t";
471 } elsif ($Anno_Features{$genome}->{'ec'}) {
472 print "\t";
473 }
474 if ($Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}->{'product'}) {
475 print $Annotation{$Ortho_Groups{$og}->{$genome}->[$i]}->{'product'}, "\t";
476 } elsif ($Anno_Features{$genome}->{'product'}) {
477 warn "\n### Warning:\nNo product annotation (p=) in file '$genome' for ID:\n$Ortho_Groups{$og}->{$genome}->[$i]\nProceeding ...\n"; # overlaps with 'warn' for missing product annotation in parsing the annotations of the genomes
478 print "\t";
479 }
480
481 } else { # genome without a CDS in current $og or less paralogous CDSs than $max_paralog_count
482 my $times = 5;
483 $times -= 1 if (!$Opt_Length);
484 $times -= 1 if (!$Anno_Features{$genome}->{'gene'}); # genome doesn't have a 'g=/gene' annotation at all
485 $times -= 1 if (!$Anno_Features{$genome}->{'ec'});
486 $times -= 1 if (!$Anno_Features{$genome}->{'product'}); # very unlikely that there's a whole genome without product annotation ...
487 print "\t" x $times;
488 }
489 }
490 print "\n";
491 }
492
493 return 1;
494 }
495