comparison Roary/lib/Bio/Roary/GeneNamesFromGFF.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c47a5f61bc9f
1 package Bio::Roary::GeneNamesFromGFF;
2
3 # ABSTRACT: Parse a GFF and efficiently extract ID -> Gene Name
4
5 =head1 SYNOPSIS
6
7 Parse a GFF and efficiently extract ID -> Gene Name
8 use Bio::Roary::GeneNamesFromGFF;
9
10 my $obj = Bio::Roary::GeneNamesFromGFF->new(
11 gff_file => 'abc.gff'
12 );
13 $obj->ids_to_gene_name;
14
15 =cut
16
17 use Moose;
18
19 use Bio::Tools::GFF;
20 with 'Bio::Roary::ParseGFFAnnotationRole';
21
22 has 'ids_to_gene_name' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_ids_to_gene_name' );
23 has 'ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
24 has 'ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
25
26 # Parsing with the perl GFF module is exceptionally slow.
27 sub _build_ids_to_gene_name {
28 my ($self) = @_;
29 my %id_to_gene_name;
30
31 my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
32 while ( my $feature = $gffio->next_feature() ) {
33 my $gene_id = $self->_get_feature_id($feature);
34 next unless ($gene_id);
35
36 if ( $feature->has_tag('gene') ) {
37 my ( $gene_name, @junk ) = $feature->get_tag_values('gene');
38 $gene_name =~ s!"!!g;
39 if ( $gene_name ne "" ) {
40 $id_to_gene_name{$gene_id} = $gene_name;
41 }
42 }
43 elsif ( $feature->has_tag('Name') ) {
44 my ( $gene_name, @junk ) = $feature->get_tag_values('Name');
45 $gene_name =~ s!"!!g;
46 if ( $gene_name ne "" ) {
47 $id_to_gene_name{$gene_id} = $gene_name;
48 }
49 }
50
51 if ( $feature->has_tag('product') ) {
52 my ( $product, @junk ) = $feature->get_tag_values('product');
53 $self->ids_to_product->{$gene_id} = $product;
54 }
55 $self->ids_to_gene_size->{$gene_id} = $feature->end - $feature->start;
56 }
57
58 return \%id_to_gene_name;
59 }
60
61 sub _get_feature_id {
62 my ( $self, $feature ) = @_;
63 my ( $gene_id, @junk );
64 if ( $feature->has_tag('ID') ) {
65 ( $gene_id, @junk ) = $feature->get_tag_values('ID');
66 }
67 elsif ( $feature->has_tag('locus_tag') ) {
68 ( $gene_id, @junk ) = $feature->get_tag_values('locus_tag');
69 }
70 else {
71 return undef;
72 }
73 $gene_id =~ s!["']!!g;
74 return undef if ( $gene_id eq "" );
75 return $gene_id;
76 }
77
78 no Moose;
79 __PACKAGE__->meta->make_immutable;
80
81 1;