Mercurial > repos > dereeper > roary_plots
comparison Roary/lib/Bio/Roary/GeneNamesFromGFF.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c47a5f61bc9f |
---|---|
1 package Bio::Roary::GeneNamesFromGFF; | |
2 | |
3 # ABSTRACT: Parse a GFF and efficiently extract ID -> Gene Name | |
4 | |
5 =head1 SYNOPSIS | |
6 | |
7 Parse a GFF and efficiently extract ID -> Gene Name | |
8 use Bio::Roary::GeneNamesFromGFF; | |
9 | |
10 my $obj = Bio::Roary::GeneNamesFromGFF->new( | |
11 gff_file => 'abc.gff' | |
12 ); | |
13 $obj->ids_to_gene_name; | |
14 | |
15 =cut | |
16 | |
17 use Moose; | |
18 | |
19 use Bio::Tools::GFF; | |
20 with 'Bio::Roary::ParseGFFAnnotationRole'; | |
21 | |
22 has 'ids_to_gene_name' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_ids_to_gene_name' ); | |
23 has 'ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); | |
24 has 'ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); | |
25 | |
26 # Parsing with the perl GFF module is exceptionally slow. | |
27 sub _build_ids_to_gene_name { | |
28 my ($self) = @_; | |
29 my %id_to_gene_name; | |
30 | |
31 my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 ); | |
32 while ( my $feature = $gffio->next_feature() ) { | |
33 my $gene_id = $self->_get_feature_id($feature); | |
34 next unless ($gene_id); | |
35 | |
36 if ( $feature->has_tag('gene') ) { | |
37 my ( $gene_name, @junk ) = $feature->get_tag_values('gene'); | |
38 $gene_name =~ s!"!!g; | |
39 if ( $gene_name ne "" ) { | |
40 $id_to_gene_name{$gene_id} = $gene_name; | |
41 } | |
42 } | |
43 elsif ( $feature->has_tag('Name') ) { | |
44 my ( $gene_name, @junk ) = $feature->get_tag_values('Name'); | |
45 $gene_name =~ s!"!!g; | |
46 if ( $gene_name ne "" ) { | |
47 $id_to_gene_name{$gene_id} = $gene_name; | |
48 } | |
49 } | |
50 | |
51 if ( $feature->has_tag('product') ) { | |
52 my ( $product, @junk ) = $feature->get_tag_values('product'); | |
53 $self->ids_to_product->{$gene_id} = $product; | |
54 } | |
55 $self->ids_to_gene_size->{$gene_id} = $feature->end - $feature->start; | |
56 } | |
57 | |
58 return \%id_to_gene_name; | |
59 } | |
60 | |
61 sub _get_feature_id { | |
62 my ( $self, $feature ) = @_; | |
63 my ( $gene_id, @junk ); | |
64 if ( $feature->has_tag('ID') ) { | |
65 ( $gene_id, @junk ) = $feature->get_tag_values('ID'); | |
66 } | |
67 elsif ( $feature->has_tag('locus_tag') ) { | |
68 ( $gene_id, @junk ) = $feature->get_tag_values('locus_tag'); | |
69 } | |
70 else { | |
71 return undef; | |
72 } | |
73 $gene_id =~ s!["']!!g; | |
74 return undef if ( $gene_id eq "" ); | |
75 return $gene_id; | |
76 } | |
77 | |
78 no Moose; | |
79 __PACKAGE__->meta->make_immutable; | |
80 | |
81 1; |