Mercurial > repos > nml > pseudogenome
diff pseudogenome.pl @ 0:47b586ab4729 draft default tip
planemo upload commit 4fee4519135f7677cf50f721cf1ad7a7335ad66d-dirty
author | nml |
---|---|
date | Fri, 06 Apr 2018 14:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pseudogenome.pl Fri Apr 06 14:29:17 2018 -0400 @@ -0,0 +1,198 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use autodie qw(:all); +use Bio::SeqIO; +use Readonly; +use File::Basename; +use Getopt::Long; +use Pod::Usage; +Getopt::Long::Configure('bundling'); + +=head1 NAME + +nml_pseudogenome.pl - To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file + +=head1 SYNOPSIS + +nml_pseudogenome.pl -i F<file_name.fna> -n 100 -c X -o F<filename.fna> + +=head1 OPTIONS + +=over + +=item B<-i>, B<--input> + +Multiple fasta file + +=item B<-n>, B<--number> + +Number of filler base pairs to be added, default : 10 + +=item B<-c>, B<--chars> + +Character to be used as the 'glue' between contigs, default : 'N' + +=item B<--id> + +Name of fasta file to be used default: pseudogenome + +=item B<-o>, B<--output> + +Output file name, default : Same as input + +=item B<-s>, B<--stitch> + +Add the stitch pattern between contigs only + +=item B<-h>, B<--help> + +Print this help + +=item EXAMPLE + +nml_pseudogenome.pl -i multiple_fasta.fna -n 100 -c X -o pseudo.fna + +nml_pseudogenome.pl -i another_multiple.fna + +=back + +=head1 DESCRIPTION + +To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file. + +=cut + +# Nonsub perlcode + +Readonly my $DEFAULT_NUM_CHAR => 10; +Readonly my $stitch_pattern => 'NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN'; +Readonly my $DEFAULT_CHAR => 'N'; +my ( $input,$id, $number, $char, $output,$stitch, $help ); + +GetOptions( + 'i|input=s' => \$input, + 'n|number=s' => \$number, + 'c|char=s' => \$char, + 'o|output=s' => \$output, + 'h|help' => \$help, + 's|stitch' => \$stitch, + 'id=s' => \$id +); +($input,$id,$number,$char,$output) =check_inputs( $input, $number, $char,$output, $help,$stitch ); + + + +my $in = Bio::SeqIO->new(-file=>$input,-format=>'fasta'); + +my $sequence; + +#go thru every sequence and append to main sequence +while (my $seq = $in->next_seq()) { + if ($stitch) { + $sequence .= $seq->seq . $stitch_pattern; + } + else { + $sequence .= $seq->seq . ($char x $number ); + } + +} + +my $main = Bio::Seq->new(-display_id=>$id,-seq=>$sequence); + +my $out = Bio::SeqIO->new(-file => ">$output" ,-format=>'fasta'); +$out->write_seq($main); + +exit; + +=begin HTML + +=head2 check_inputs + + Title : check_inputs + Usage : check_inputs($fasta,$num,$filler,$out_to,$usage); + Function: check arguments provided by the user to see if they are usable and more or less correct + Returns : Return 1 if all is correct,otherwise 0 + Args : $query: Query that we are looking for in the database. Could be accession number or locus_tag + $db: Name of database we are looking for using the query provided + $format: Ensure that format was given by user and is valid format + $usage: If true, return usage description + Throws : none + +=cut + +sub check_inputs { + my ( $fasta, $num, $filler, $out_to, $usage,$use_stitch ) = @_; + + if ( $help || !( $fasta || $num || $filler || $out_to ) ) { + pod2usage(); + exit; + } + + if ( !($fasta) || !( -e $fasta ) ) { + print STDERR "Error: Input file not given or does not exist\n"; + pod2usage(); + exit; + } + + if ($use_stitch) { + print "Using stitch pattern\n"; + + } + else { + if ( !$num ) { + $num = $DEFAULT_NUM_CHAR; + print STDERR "Number of character not given, using $num\n"; + } + elsif ( !( $num =~ /^\d+$/xms ) ) { + print STDERR "Error: Number of character was not a number\n"; + pod2usage(); + exit; + } + + if ( !$filler ) { + $filler = $DEFAULT_CHAR; + print STDERR "No filler character given, using 'N'\n"; + } + + } + + if ( !($out_to) ) { + $out_to = fileparse($fasta) . ".pseudogenome"; + print + "Output file was not given. Result will be written to '$out_to'\n"; + } + if ( ! $id) { + $id = 'pseudogenome'; + } + + return ( $fasta,$id, $num, $filler, $out_to ); +} + +=end HTML + +=head1 SEE ALSO + +No related files. + +=head1 AUTHOR + +Philip Mabon, <philip.mabon@canada.ca> + +=head1 BUGS + +None reported. + +=head1 COPYRIGHT & LICENSE + +Copyright (C) 2018 by Public Health Agency of Canada + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.2 or, +at your option, any later version of Perl 5 you may have available. + +=head1 DEVELOPER PAGE + +No developer documentation. + +=cut