Mercurial > repos > dereeper > roary_plots
diff Roary/lib/Bio/Roary/Output/DifferenceBetweenSets.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Roary/lib/Bio/Roary/Output/DifferenceBetweenSets.pm Fri May 14 20:27:06 2021 +0000 @@ -0,0 +1,126 @@ +package Bio::Roary::Output::DifferenceBetweenSets; + +# ABSTRACT: Given two sets of isolates and a group file, output whats unique in each and whats in common + +=head1 SYNOPSIS + +Given two sets of isolates and a group file, output whats unique in each and whats in common + use Bio::Roary::Output::DifferenceBetweenSets; + + my $obj = Bio::Roary::Output::DifferenceBetweenSets->new( + analyse_groups => $analyse_groups, + input_filenames_sets => + [ + ['aaa.faa','bbb.faa'], + ['ccc.faa','ddd.faa'] + ], + ); + $obj->groups_set_one_unique(); + $obj->groups_set_two_unique(); + $obj->groups_in_common(); + +=cut + +use Moose; +use Bio::SeqIO; +use Bio::Roary::Exceptions; +use Bio::Roary::AnalyseGroups; +use Bio::Roary::Output::QueryGroups; + +has 'analyse_groups' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 ); +has 'input_filenames_sets' => ( is => 'ro', isa => 'ArrayRef[ArrayRef]', required => 1 ); +has 'output_filename_base' => ( is => 'ro', isa => 'Str', default => 'set_difference' ); + +has '_query_groups_objs' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__query_groups_objs' ); + +# TODO: update to handle more than 2 input sets + +sub _build__query_groups_objs { + my ($self) = @_; + my @query_groups_objs; + for my $file_name_set ( @{ $self->input_filenames_sets } ) { + push( + @query_groups_objs, + Bio::Roary::Output::QueryGroups->new( + analyse_groups => $self->analyse_groups, + input_filenames => $file_name_set + ) + ); + } + + my @all_input_files = (@{ $self->input_filenames_sets->[0] },@{ $self->input_filenames_sets->[1] }); + push( + @query_groups_objs, + Bio::Roary::Output::QueryGroups->new( + analyse_groups => $self->analyse_groups, + input_filenames => \@all_input_files + ) + ); + + + return \@query_groups_objs; +} + +sub _subtract_arrays { + my ( $self, $array_1, $array_2 ) = @_; + my %array_1 = map { $_ => 1 } @{$array_1}; + my @difference = grep { not $array_1{$_} } @{$array_2}; + return \@difference; +} + +sub _groups_unique { + my ( $self, $output_filename, $query_group1, $query_group2 ) = @_; + my $unique_groups = $self->_subtract_arrays( $query_group2->_groups, $query_group1->_groups ); + $query_group1->groups_with_external_inputs( $output_filename, $unique_groups ); +} + +sub groups_set_one_unique_filename +{ + my ($self) = @_; + return $self->output_filename_base . '_unique_set_one'; +} + +sub groups_set_two_unique_filename +{ + my ($self) = @_; + return $self->output_filename_base . '_unique_set_two'; +} + +sub groups_in_common_filename +{ + my ($self) = @_; + return $self->output_filename_base . '_common_set'; +} + + +sub groups_set_one_unique { + my ($self) = @_; + $self->_groups_unique( + $self->groups_set_one_unique_filename, + $self->_query_groups_objs->[0], + $self->_query_groups_objs->[1] + ); +} + +sub groups_set_two_unique { + my ($self) = @_; + $self->_groups_unique( + $self->groups_set_two_unique_filename, + $self->_query_groups_objs->[1], + $self->_query_groups_objs->[0] + ); +} + +sub groups_in_common { + my ($self) = @_; + my $unique_group_1 = $self->_subtract_arrays( $self->_query_groups_objs->[0]->_groups, $self->_query_groups_objs->[1]->_groups ); + my $unique_group_2 = $self->_subtract_arrays( $self->_query_groups_objs->[1]->_groups, $self->_query_groups_objs->[0]->_groups ); + my $common_groups_1 = $self->_subtract_arrays( $unique_group_1,$self->_query_groups_objs->[2]->_groups); + my $common_groups_2 = $self->_subtract_arrays( $unique_group_2,$common_groups_1); + $self->_query_groups_objs->[2]->groups_with_external_inputs( $self->groups_in_common_filename, $common_groups_2 ); +} + +no Moose; +__PACKAGE__->meta->make_immutable; + +1;