annotate Roary/lib/Bio/Roary/PresenceAbsenceMatrix.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 package Bio::Roary::PresenceAbsenceMatrix;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # ABSTRACT: Create a matrix with presence and absence
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7 Create a matrix with presence and absence. Since its computationally intensive to generate the inputs, calculate them once
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 in the GroupStatistics module and pass them through.
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9 use Bio::Roary::PresenceAbsenceMatrix;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11 my $obj = Bio::Roary::PresenceAbsenceMatrix->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 annotate_groups_obj => $annotate_groups_obj,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 output_filename => 'gene_presence_absence.Rtab',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 sorted_file_names => $sorted_file_names,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 groups_to_files => $groups_to_files,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16 num_files_in_groups => $num_files_in_groups,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 sample_headers => $sample_headers,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 $obj->create_matrix_file;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 use Text::CSV;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 use Bio::SeqIO;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 use Bio::Roary::Exceptions;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 use Bio::Roary::AnnotateGroups;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 has 'sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 has 'num_files_in_groups' => ( is => 'ro', isa => 'HashRef', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33 has 'sample_headers' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 sub _build__output_fh {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 open( my $fh, '>', $self->output_filename )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43 return $fh;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 sub _build__text_csv_obj {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48 return Text::CSV->new( { binary => 1, always_quote => 0, sep_char => "\t", eol => "\r\n" } );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 sub create_matrix_file {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 # Header row
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 unshift @{ $self->sample_headers }, 'Gene';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 $self->_text_csv_obj->print( $self->_output_fh, $self->sample_headers );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 for my $group ( sort { $self->num_files_in_groups->{$b} <=> $self->num_files_in_groups->{$a} || $a cmp $b }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 keys %{ $self->num_files_in_groups } )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61 my @row;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63 push( @row, $annotated_group_name );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 for my $filename ( @{ $self->sorted_file_names } ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 my $group_to_file_genes = $self->groups_to_files->{$group}->{$filename};
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 push( @row, 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 next;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 else {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 push( @row, 0 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 $self->_text_csv_obj->print( $self->_output_fh, \@row );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 close( $self->_output_fh );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 return $self;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84 1;