0
|
1 package Bio::Roary::PresenceAbsenceMatrix;
|
|
2
|
|
3 # ABSTRACT: Create a matrix with presence and absence
|
|
4
|
|
5 =head1 SYNOPSIS
|
|
6
|
|
7 Create a matrix with presence and absence. Since its computationally intensive to generate the inputs, calculate them once
|
|
8 in the GroupStatistics module and pass them through.
|
|
9 use Bio::Roary::PresenceAbsenceMatrix;
|
|
10
|
|
11 my $obj = Bio::Roary::PresenceAbsenceMatrix->new(
|
|
12 annotate_groups_obj => $annotate_groups_obj,
|
|
13 output_filename => 'gene_presence_absence.Rtab',
|
|
14 sorted_file_names => $sorted_file_names,
|
|
15 groups_to_files => $groups_to_files,
|
|
16 num_files_in_groups => $num_files_in_groups,
|
|
17 sample_headers => $sample_headers,
|
|
18 );
|
|
19 $obj->create_matrix_file;
|
|
20
|
|
21 =cut
|
|
22
|
|
23 use Moose;
|
|
24 use Text::CSV;
|
|
25 use Bio::SeqIO;
|
|
26 use Bio::Roary::Exceptions;
|
|
27 use Bio::Roary::AnnotateGroups;
|
|
28
|
|
29 has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
|
|
30 has 'sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
|
|
31 has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', required => 1 );
|
|
32 has 'num_files_in_groups' => ( is => 'ro', isa => 'HashRef', required => 1 );
|
|
33 has 'sample_headers' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
|
|
34 has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
|
|
35
|
|
36 has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
|
|
37 has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
|
|
38
|
|
39 sub _build__output_fh {
|
|
40 my ($self) = @_;
|
|
41 open( my $fh, '>', $self->output_filename )
|
|
42 or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
|
|
43 return $fh;
|
|
44 }
|
|
45
|
|
46 sub _build__text_csv_obj {
|
|
47 my ($self) = @_;
|
|
48 return Text::CSV->new( { binary => 1, always_quote => 0, sep_char => "\t", eol => "\r\n" } );
|
|
49 }
|
|
50
|
|
51 sub create_matrix_file {
|
|
52 my ($self) = @_;
|
|
53
|
|
54 # Header row
|
|
55 unshift @{ $self->sample_headers }, 'Gene';
|
|
56 $self->_text_csv_obj->print( $self->_output_fh, $self->sample_headers );
|
|
57
|
|
58 for my $group ( sort { $self->num_files_in_groups->{$b} <=> $self->num_files_in_groups->{$a} || $a cmp $b }
|
|
59 keys %{ $self->num_files_in_groups } )
|
|
60 {
|
|
61 my @row;
|
|
62 my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
|
|
63 push( @row, $annotated_group_name );
|
|
64 for my $filename ( @{ $self->sorted_file_names } ) {
|
|
65 my $group_to_file_genes = $self->groups_to_files->{$group}->{$filename};
|
|
66
|
|
67 if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
|
|
68 push( @row, 1 );
|
|
69 next;
|
|
70 }
|
|
71 else {
|
|
72 push( @row, 0 );
|
|
73 }
|
|
74 }
|
|
75 $self->_text_csv_obj->print( $self->_output_fh, \@row );
|
|
76 }
|
|
77 close( $self->_output_fh );
|
|
78 return $self;
|
|
79 }
|
|
80
|
|
81 no Moose;
|
|
82 __PACKAGE__->meta->make_immutable;
|
|
83
|
|
84 1;
|