diff Roary/lib/Bio/Roary/ChunkFastaFile.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Roary/lib/Bio/Roary/ChunkFastaFile.pm	Fri May 14 20:27:06 2021 +0000
@@ -0,0 +1,77 @@
+package Bio::Roary::ChunkFastaFile;
+
+# ABSTRACT: Take in a FASTA file and chunk it up into smaller pieces.
+
+=head1 SYNOPSIS
+
+Take in a FASTA file and chunk it up into smaller pieces.
+   use Bio::Roary::ChunkFastaFile;
+   
+   my $obj = Bio::Roary::ChunkFastaFile->new(
+     fasta_file   => 'abc.fa',
+   );
+   $obj->sequence_file_names;
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Cwd;
+use File::Temp;
+
+has 'fasta_file'          => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'target_chunk_size'   => ( is => 'ro', isa => 'Int',      default  => 200000 );
+has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy     => 1, builder => '_build_sequence_file_names' );
+has '_working_directory' =>
+  ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
+has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
+
+sub _build__working_directory_name {
+    my ($self) = @_;
+    return $self->_working_directory->dirname();
+}
+
+sub _build__input_seqio {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
+}
+
+sub _create_next_chunk_file_name {
+    my ( $self, $chunk_number ) = @_;
+    return join( '/', ( $self->_working_directory_name, $chunk_number . '.seq' ) );
+}
+
+sub _create_next_chunk_seqio {
+    my ( $self, $chunk_number ) = @_;
+    return Bio::SeqIO->new( -file => ">".$self->_create_next_chunk_file_name($chunk_number), -format => 'Fasta' );
+}
+
+sub _build_sequence_file_names {
+    my ($self) = @_;
+    my @sequence_file_names;
+    my $chunk_number         = 0;
+    my $current_chunk_length = 0;
+    my $current_chunk_seqio  = $self->_create_next_chunk_seqio($chunk_number);
+    push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
+
+    while ( my $input_seq = $self->_input_seqio->next_seq() ) {
+        if ( $current_chunk_length > $self->target_chunk_size ) {
+
+            # next chunk
+            $chunk_number++;
+            $current_chunk_length = 0;
+            $current_chunk_seqio  = $self->_create_next_chunk_seqio($chunk_number);
+            push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
+        }
+        $current_chunk_seqio->write_seq($input_seq);
+        $current_chunk_length += $input_seq->length();
+    }
+    return \@sequence_file_names;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;