Mercurial > repos > nml > pseudogenome
comparison pseudogenome.pl @ 0:47b586ab4729 draft default tip
planemo upload commit 4fee4519135f7677cf50f721cf1ad7a7335ad66d-dirty
| author | nml |
|---|---|
| date | Fri, 06 Apr 2018 14:29:17 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:47b586ab4729 |
|---|---|
| 1 #!/usr/bin/env perl | |
| 2 use strict; | |
| 3 use warnings; | |
| 4 use autodie qw(:all); | |
| 5 use Bio::SeqIO; | |
| 6 use Readonly; | |
| 7 use File::Basename; | |
| 8 use Getopt::Long; | |
| 9 use Pod::Usage; | |
| 10 Getopt::Long::Configure('bundling'); | |
| 11 | |
| 12 =head1 NAME | |
| 13 | |
| 14 nml_pseudogenome.pl - To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file | |
| 15 | |
| 16 =head1 SYNOPSIS | |
| 17 | |
| 18 nml_pseudogenome.pl -i F<file_name.fna> -n 100 -c X -o F<filename.fna> | |
| 19 | |
| 20 =head1 OPTIONS | |
| 21 | |
| 22 =over | |
| 23 | |
| 24 =item B<-i>, B<--input> | |
| 25 | |
| 26 Multiple fasta file | |
| 27 | |
| 28 =item B<-n>, B<--number> | |
| 29 | |
| 30 Number of filler base pairs to be added, default : 10 | |
| 31 | |
| 32 =item B<-c>, B<--chars> | |
| 33 | |
| 34 Character to be used as the 'glue' between contigs, default : 'N' | |
| 35 | |
| 36 =item B<--id> | |
| 37 | |
| 38 Name of fasta file to be used default: pseudogenome | |
| 39 | |
| 40 =item B<-o>, B<--output> | |
| 41 | |
| 42 Output file name, default : Same as input | |
| 43 | |
| 44 =item B<-s>, B<--stitch> | |
| 45 | |
| 46 Add the stitch pattern between contigs only | |
| 47 | |
| 48 =item B<-h>, B<--help> | |
| 49 | |
| 50 Print this help | |
| 51 | |
| 52 =item EXAMPLE | |
| 53 | |
| 54 nml_pseudogenome.pl -i multiple_fasta.fna -n 100 -c X -o pseudo.fna | |
| 55 | |
| 56 nml_pseudogenome.pl -i another_multiple.fna | |
| 57 | |
| 58 =back | |
| 59 | |
| 60 =head1 DESCRIPTION | |
| 61 | |
| 62 To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file. | |
| 63 | |
| 64 =cut | |
| 65 | |
| 66 # Nonsub perlcode | |
| 67 | |
| 68 Readonly my $DEFAULT_NUM_CHAR => 10; | |
| 69 Readonly my $stitch_pattern => 'NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN'; | |
| 70 Readonly my $DEFAULT_CHAR => 'N'; | |
| 71 my ( $input,$id, $number, $char, $output,$stitch, $help ); | |
| 72 | |
| 73 GetOptions( | |
| 74 'i|input=s' => \$input, | |
| 75 'n|number=s' => \$number, | |
| 76 'c|char=s' => \$char, | |
| 77 'o|output=s' => \$output, | |
| 78 'h|help' => \$help, | |
| 79 's|stitch' => \$stitch, | |
| 80 'id=s' => \$id | |
| 81 ); | |
| 82 ($input,$id,$number,$char,$output) =check_inputs( $input, $number, $char,$output, $help,$stitch ); | |
| 83 | |
| 84 | |
| 85 | |
| 86 my $in = Bio::SeqIO->new(-file=>$input,-format=>'fasta'); | |
| 87 | |
| 88 my $sequence; | |
| 89 | |
| 90 #go thru every sequence and append to main sequence | |
| 91 while (my $seq = $in->next_seq()) { | |
| 92 if ($stitch) { | |
| 93 $sequence .= $seq->seq . $stitch_pattern; | |
| 94 } | |
| 95 else { | |
| 96 $sequence .= $seq->seq . ($char x $number ); | |
| 97 } | |
| 98 | |
| 99 } | |
| 100 | |
| 101 my $main = Bio::Seq->new(-display_id=>$id,-seq=>$sequence); | |
| 102 | |
| 103 my $out = Bio::SeqIO->new(-file => ">$output" ,-format=>'fasta'); | |
| 104 $out->write_seq($main); | |
| 105 | |
| 106 exit; | |
| 107 | |
| 108 =begin HTML | |
| 109 | |
| 110 =head2 check_inputs | |
| 111 | |
| 112 Title : check_inputs | |
| 113 Usage : check_inputs($fasta,$num,$filler,$out_to,$usage); | |
| 114 Function: check arguments provided by the user to see if they are usable and more or less correct | |
| 115 Returns : Return 1 if all is correct,otherwise 0 | |
| 116 Args : $query: Query that we are looking for in the database. Could be accession number or locus_tag | |
| 117 $db: Name of database we are looking for using the query provided | |
| 118 $format: Ensure that format was given by user and is valid format | |
| 119 $usage: If true, return usage description | |
| 120 Throws : none | |
| 121 | |
| 122 =cut | |
| 123 | |
| 124 sub check_inputs { | |
| 125 my ( $fasta, $num, $filler, $out_to, $usage,$use_stitch ) = @_; | |
| 126 | |
| 127 if ( $help || !( $fasta || $num || $filler || $out_to ) ) { | |
| 128 pod2usage(); | |
| 129 exit; | |
| 130 } | |
| 131 | |
| 132 if ( !($fasta) || !( -e $fasta ) ) { | |
| 133 print STDERR "Error: Input file not given or does not exist\n"; | |
| 134 pod2usage(); | |
| 135 exit; | |
| 136 } | |
| 137 | |
| 138 if ($use_stitch) { | |
| 139 print "Using stitch pattern\n"; | |
| 140 | |
| 141 } | |
| 142 else { | |
| 143 if ( !$num ) { | |
| 144 $num = $DEFAULT_NUM_CHAR; | |
| 145 print STDERR "Number of character not given, using $num\n"; | |
| 146 } | |
| 147 elsif ( !( $num =~ /^\d+$/xms ) ) { | |
| 148 print STDERR "Error: Number of character was not a number\n"; | |
| 149 pod2usage(); | |
| 150 exit; | |
| 151 } | |
| 152 | |
| 153 if ( !$filler ) { | |
| 154 $filler = $DEFAULT_CHAR; | |
| 155 print STDERR "No filler character given, using 'N'\n"; | |
| 156 } | |
| 157 | |
| 158 } | |
| 159 | |
| 160 if ( !($out_to) ) { | |
| 161 $out_to = fileparse($fasta) . ".pseudogenome"; | |
| 162 print | |
| 163 "Output file was not given. Result will be written to '$out_to'\n"; | |
| 164 } | |
| 165 if ( ! $id) { | |
| 166 $id = 'pseudogenome'; | |
| 167 } | |
| 168 | |
| 169 return ( $fasta,$id, $num, $filler, $out_to ); | |
| 170 } | |
| 171 | |
| 172 =end HTML | |
| 173 | |
| 174 =head1 SEE ALSO | |
| 175 | |
| 176 No related files. | |
| 177 | |
| 178 =head1 AUTHOR | |
| 179 | |
| 180 Philip Mabon, <philip.mabon@canada.ca> | |
| 181 | |
| 182 =head1 BUGS | |
| 183 | |
| 184 None reported. | |
| 185 | |
| 186 =head1 COPYRIGHT & LICENSE | |
| 187 | |
| 188 Copyright (C) 2018 by Public Health Agency of Canada | |
| 189 | |
| 190 This program is free software; you can redistribute it and/or modify | |
| 191 it under the same terms as Perl itself, either Perl version 5.8.2 or, | |
| 192 at your option, any later version of Perl 5 you may have available. | |
| 193 | |
| 194 =head1 DEVELOPER PAGE | |
| 195 | |
| 196 No developer documentation. | |
| 197 | |
| 198 =cut |
