comparison pseudogenome.pl @ 0:47b586ab4729 draft default tip

planemo upload commit 4fee4519135f7677cf50f721cf1ad7a7335ad66d-dirty
author nml
date Fri, 06 Apr 2018 14:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:47b586ab4729
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use autodie qw(:all);
5 use Bio::SeqIO;
6 use Readonly;
7 use File::Basename;
8 use Getopt::Long;
9 use Pod::Usage;
10 Getopt::Long::Configure('bundling');
11
12 =head1 NAME
13
14 nml_pseudogenome.pl - To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file
15
16 =head1 SYNOPSIS
17
18 nml_pseudogenome.pl -i F<file_name.fna> -n 100 -c X -o F<filename.fna>
19
20 =head1 OPTIONS
21
22 =over
23
24 =item B<-i>, B<--input>
25
26 Multiple fasta file
27
28 =item B<-n>, B<--number>
29
30 Number of filler base pairs to be added, default : 10
31
32 =item B<-c>, B<--chars>
33
34 Character to be used as the 'glue' between contigs, default : 'N'
35
36 =item B<--id>
37
38 Name of fasta file to be used default: pseudogenome
39
40 =item B<-o>, B<--output>
41
42 Output file name, default : Same as input
43
44 =item B<-s>, B<--stitch>
45
46 Add the stitch pattern between contigs only
47
48 =item B<-h>, B<--help>
49
50 Print this help
51
52 =item EXAMPLE
53
54 nml_pseudogenome.pl -i multiple_fasta.fna -n 100 -c X -o pseudo.fna
55
56 nml_pseudogenome.pl -i another_multiple.fna
57
58 =back
59
60 =head1 DESCRIPTION
61
62 To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file.
63
64 =cut
65
66 # Nonsub perlcode
67
68 Readonly my $DEFAULT_NUM_CHAR => 10;
69 Readonly my $stitch_pattern => 'NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN';
70 Readonly my $DEFAULT_CHAR => 'N';
71 my ( $input,$id, $number, $char, $output,$stitch, $help );
72
73 GetOptions(
74 'i|input=s' => \$input,
75 'n|number=s' => \$number,
76 'c|char=s' => \$char,
77 'o|output=s' => \$output,
78 'h|help' => \$help,
79 's|stitch' => \$stitch,
80 'id=s' => \$id
81 );
82 ($input,$id,$number,$char,$output) =check_inputs( $input, $number, $char,$output, $help,$stitch );
83
84
85
86 my $in = Bio::SeqIO->new(-file=>$input,-format=>'fasta');
87
88 my $sequence;
89
90 #go thru every sequence and append to main sequence
91 while (my $seq = $in->next_seq()) {
92 if ($stitch) {
93 $sequence .= $seq->seq . $stitch_pattern;
94 }
95 else {
96 $sequence .= $seq->seq . ($char x $number );
97 }
98
99 }
100
101 my $main = Bio::Seq->new(-display_id=>$id,-seq=>$sequence);
102
103 my $out = Bio::SeqIO->new(-file => ">$output" ,-format=>'fasta');
104 $out->write_seq($main);
105
106 exit;
107
108 =begin HTML
109
110 =head2 check_inputs
111
112 Title : check_inputs
113 Usage : check_inputs($fasta,$num,$filler,$out_to,$usage);
114 Function: check arguments provided by the user to see if they are usable and more or less correct
115 Returns : Return 1 if all is correct,otherwise 0
116 Args : $query: Query that we are looking for in the database. Could be accession number or locus_tag
117 $db: Name of database we are looking for using the query provided
118 $format: Ensure that format was given by user and is valid format
119 $usage: If true, return usage description
120 Throws : none
121
122 =cut
123
124 sub check_inputs {
125 my ( $fasta, $num, $filler, $out_to, $usage,$use_stitch ) = @_;
126
127 if ( $help || !( $fasta || $num || $filler || $out_to ) ) {
128 pod2usage();
129 exit;
130 }
131
132 if ( !($fasta) || !( -e $fasta ) ) {
133 print STDERR "Error: Input file not given or does not exist\n";
134 pod2usage();
135 exit;
136 }
137
138 if ($use_stitch) {
139 print "Using stitch pattern\n";
140
141 }
142 else {
143 if ( !$num ) {
144 $num = $DEFAULT_NUM_CHAR;
145 print STDERR "Number of character not given, using $num\n";
146 }
147 elsif ( !( $num =~ /^\d+$/xms ) ) {
148 print STDERR "Error: Number of character was not a number\n";
149 pod2usage();
150 exit;
151 }
152
153 if ( !$filler ) {
154 $filler = $DEFAULT_CHAR;
155 print STDERR "No filler character given, using 'N'\n";
156 }
157
158 }
159
160 if ( !($out_to) ) {
161 $out_to = fileparse($fasta) . ".pseudogenome";
162 print
163 "Output file was not given. Result will be written to '$out_to'\n";
164 }
165 if ( ! $id) {
166 $id = 'pseudogenome';
167 }
168
169 return ( $fasta,$id, $num, $filler, $out_to );
170 }
171
172 =end HTML
173
174 =head1 SEE ALSO
175
176 No related files.
177
178 =head1 AUTHOR
179
180 Philip Mabon, <philip.mabon@canada.ca>
181
182 =head1 BUGS
183
184 None reported.
185
186 =head1 COPYRIGHT & LICENSE
187
188 Copyright (C) 2018 by Public Health Agency of Canada
189
190 This program is free software; you can redistribute it and/or modify
191 it under the same terms as Perl itself, either Perl version 5.8.2 or,
192 at your option, any later version of Perl 5 you may have available.
193
194 =head1 DEVELOPER PAGE
195
196 No developer documentation.
197
198 =cut