0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 #
|
|
4 # convertFastaHeaders.pl
|
|
5 #
|
|
6 # $Id: ConvertFastaHeaders.pl 44 2010-10-18 12:58:41Z pieter.neerincx@gmail.com $
|
|
7 # $URL: https://trac.nbic.nl/svn/galaxytools/trunk/tools/general/FastaTools/ConvertFastaHeaders.pl $
|
|
8 # $LastChangedDate: 2010-10-18 07:58:41 -0500 (Mon, 18 Oct 2010) $
|
|
9 # $LastChangedRevision: 44 $
|
|
10 # $LastChangedBy: pieter.neerincx@gmail.com $
|
|
11 #
|
|
12 # Converts sequence header of FASTA files (in various customisable ways).
|
|
13 #
|
|
14
|
|
15 #
|
|
16 # Initialize evironment
|
|
17 #
|
|
18 use strict;
|
|
19 use Getopt::Std;
|
|
20 use Log::Log4perl qw(:easy);
|
|
21
|
|
22 my %log_levels = (
|
|
23 'ALL' => $ALL,
|
|
24 'TRACE' => $TRACE,
|
|
25 'DEBUG' => $DEBUG,
|
|
26 'INFO' => $INFO,
|
|
27 'WARN' => $WARN,
|
|
28 'ERROR' => $ERROR,
|
|
29 'FATAL' => $FATAL,
|
|
30 'OFF' => $OFF,
|
|
31 );
|
|
32
|
|
33 #
|
|
34 # Get options.
|
|
35 #
|
|
36 my %opts;
|
|
37 Getopt::Std::getopts('i:o:l:e:f:n:a:p:', \%opts);
|
|
38 my $input = $opts{'i'};
|
|
39 my $output = $opts{'o'};
|
|
40 my $log_level = $opts{'l'};
|
|
41 my $extension = $opts{'e'};
|
|
42 my @x_fixes_array = split(/\s+/, $opts{'f'});
|
|
43 my $new_x_fix = $opts{'n'};
|
|
44 my $action = $opts{'a'};
|
|
45 my $position = $opts{'p'};
|
|
46 my %ids_to_delete;
|
|
47 my @new_id_order;
|
|
48
|
|
49 #
|
|
50 # Configure logging.
|
|
51 #
|
|
52 # Provides default if user did not specify log level:
|
|
53 $log_level = (defined($log_level) ? $log_level : 'WARN');
|
|
54 # Reset log level to default if user specified illegal log level.
|
|
55 $log_level = (defined($log_levels{$log_level}) ? $log_levels{$log_level} : $log_levels{'WARN'});
|
|
56 #Log::Log4perl->init('log4perl.properties');
|
|
57 Log::Log4perl->easy_init(
|
|
58 #{ level => $log_level,
|
|
59 # file => ">>ConvertFastaHeaders.log",
|
|
60 # layout => '%F{1}-%L-%M: %m%n' },
|
|
61 { level => $log_level,
|
|
62 file => "STDERR",
|
|
63 layout => '%d L:%L %p> %m%n' },
|
|
64 );
|
|
65 my $logger = Log::Log4perl::get_logger();
|
|
66
|
|
67 #
|
|
68 # Start the conversion process.
|
|
69 #
|
|
70 $logger->info("Starting...");
|
|
71
|
|
72 #
|
|
73 # Check user input.
|
|
74 #
|
|
75
|
|
76 # Provides default if user did not specify action:
|
|
77 $action = (defined($action) ? $action : 'add');
|
|
78
|
|
79 # Check for valid action and action specific options.
|
|
80 if ($action eq 'add' || $action eq 'strip' || $action eq 'replace') {
|
|
81
|
|
82 unless (scalar(@x_fixes_array) > 0) {
|
|
83 $logger->fatal('No prefixes or suffixes specified.');
|
|
84 _Usage();
|
|
85 }
|
|
86
|
|
87 if ($action eq 'replace') {
|
|
88 unless (defined($new_x_fix) && $new_x_fix ne '') {
|
|
89 $logger->fatal('No new prefix or suffix specified to replace the existing ones.');
|
|
90 _Usage();
|
|
91 }
|
|
92 }
|
|
93
|
|
94 # Provides default if user did not specify position:
|
|
95 $position = (defined($position) ? $position : 'prefix');
|
|
96 # Check for valid position.
|
|
97 if ($action eq 'add' || $action eq 'strip') {
|
|
98 unless ($position eq 'prefix' || $position eq 'suffix') {
|
|
99 $logger->fatal('Illegal position specified. Must be \'prefix\' or \'suffix\'.');
|
|
100 _Usage();
|
|
101 }
|
|
102 } elsif ($action eq 'replace') {
|
|
103 unless ($position eq 'prefix' || $position eq 'suffix' || $position eq 'pre2suf' || $position eq 'suf2pre') {
|
|
104 $logger->fatal('Illegal position specified. Must be \'prefix\', \'suffix\', \'pre2suf\' or \'suf2pre\'.');
|
|
105 _Usage();
|
|
106 }
|
|
107 }
|
|
108
|
|
109 } elsif ($action eq 'delete' || $action eq 'shuffle') {
|
|
110
|
|
111 unless (defined($position) && $position ne '') {
|
|
112 $logger->fatal('No position specified.');
|
|
113 _Usage();
|
|
114 }
|
|
115
|
|
116 my @id_indices = split(/,/, $position);
|
|
117
|
|
118 # Check if the value is a number.
|
|
119 foreach my $index_number (@id_indices) {
|
|
120
|
|
121 unless ($index_number =~ m/^[1-9][0-9]*$/) {
|
|
122
|
|
123 $logger->fatal('Illegal character in position list. Must be a single positive integer or comma separated list of positive integers.');
|
|
124 _Usage();
|
|
125
|
|
126 }
|
|
127
|
|
128 if ($action eq 'delete') {
|
|
129
|
|
130 $ids_to_delete{$index_number} = 'del';
|
|
131
|
|
132 } elsif ($action eq 'shuffle') {
|
|
133
|
|
134 push(@new_id_order, $index_number);
|
|
135
|
|
136 }
|
|
137 }
|
|
138
|
|
139 } else {
|
|
140 $logger->fatal('Illegal action specified. Must be add, strip, replace, delete or shuffle.');
|
|
141 _Usage();
|
|
142 }
|
|
143
|
|
144
|
|
145 # Provides default if user did not specify log level:
|
|
146 $log_level = (defined($log_level) ? $log_level : 'WARN');
|
|
147 # Reset log level to default if user specified illegal log level.
|
|
148 $log_level = (defined($log_levels{$log_level}) ? $log_levels{$log_level} : $log_levels{'WARN'});
|
|
149
|
|
150 # Provide default if user did not specify fasta filename extension:
|
|
151 $extension = (defined($extension) ? $extension : 'fa');
|
|
152
|
|
153 if ($input =~ /^$/ || $output =~ /^$/) {
|
|
154 # Indir and outdir cannot be empty.
|
|
155 _Usage();
|
|
156 }
|
|
157 if ($input eq $output) {
|
|
158 $logger->fatal("Output dir/file is the same as the input dir/file. Please choose a different one.");
|
|
159 exit;
|
|
160 }
|
|
161
|
|
162 #
|
|
163 # Check if input is a single file or a directory.
|
|
164 #
|
|
165 unless (-e $input && -r $input) {
|
|
166
|
|
167 $logger->fatal("Input $input does not exist or is not readable: $!");
|
|
168 exit;
|
|
169
|
|
170 } else {
|
|
171
|
|
172 if (-f $input) {
|
|
173
|
|
174 #
|
|
175 # We've got an input file.
|
|
176 #
|
|
177 my $file;
|
|
178 if ($input =~ m/(.+\/)([^\/]+)$/) {
|
|
179 $file = $2;
|
|
180 } else {
|
|
181 $file = $input;
|
|
182 }
|
|
183
|
|
184 $logger->info('Parsing ' . $file . "...\n");
|
|
185
|
|
186 _ConvertFastaHeaders($input, $output, $action, \@x_fixes_array, $new_x_fix, $position, \%ids_to_delete, \@new_id_order);
|
|
187
|
|
188 $logger->info('Converted ' . $file);
|
|
189
|
|
190 } else {
|
|
191
|
|
192 #
|
|
193 # We've got an input directory.
|
|
194 # Assume the output is also a directory.
|
|
195 # Append trailing path separators if they was missing.
|
|
196 #
|
|
197 my $indir;
|
|
198 my $outdir;
|
|
199 unless ($input =~ m/\/$/) {
|
|
200 $input = $input .+ '/';
|
|
201 }
|
|
202 unless ($output =~ m/\/$/) {
|
|
203 $output = $output .+ '/';
|
|
204 }
|
|
205 #
|
|
206 # Make sure the input dir is a directory.
|
|
207 #
|
|
208 unless (-d $input) {
|
|
209 $logger->fatal("Input $input is not a file nor a directory: $!");
|
|
210 exit;
|
|
211 } else {
|
|
212 $indir = $input;
|
|
213 $outdir = $output;
|
|
214 }
|
|
215
|
|
216 #
|
|
217 # Get all FASTA files from the input dir.
|
|
218 #
|
|
219 my $files = _GetFiles($indir, $outdir, $extension);
|
|
220
|
|
221 #
|
|
222 # Create the output directory if did not exist yet.
|
|
223 #
|
|
224 if (-e $outdir && -d $outdir) {
|
|
225 unless (-w $outdir) {
|
|
226 $logger->fatal("Cannot write to output directory $outdir. Check for permission errors, read-only file systems, etc.");
|
|
227 exit;
|
|
228 }
|
|
229 } else {
|
|
230 $logger->info("Creating output directory $outdir...");
|
|
231 eval{mkdir($outdir);};
|
|
232 if ($@) {
|
|
233 $logger->fatal("Cannot create output directory $outdir: $@");
|
|
234 exit;
|
|
235 }
|
|
236 }
|
|
237
|
|
238 #
|
|
239 # Convert FASTA files.
|
|
240 #
|
|
241 foreach my $file (@{$files}) {
|
|
242
|
|
243 $logger->info('Parsing ' . $file . "...\n");
|
|
244
|
|
245 my $pathfrom = $indir .+ $file;
|
|
246 my $pathto = $outdir .+ $file;
|
|
247
|
|
248 _ConvertFastaHeaders($input, $output, $action, \@x_fixes_array, $new_x_fix, $position, \%ids_to_delete, \@new_id_order);
|
|
249
|
|
250 $logger->info('Converted ' . $file);
|
|
251
|
|
252 }
|
|
253 }
|
|
254 }
|
|
255
|
|
256 $logger->info('Finished!');
|
|
257
|
|
258 #
|
|
259 ##
|
|
260 ### Internal subs.
|
|
261 ##
|
|
262 #
|
|
263
|
|
264 sub _GetFiles {
|
|
265
|
|
266 my ($indir, $outdir, $extension) = @_;
|
|
267 my @files;
|
|
268
|
|
269 #
|
|
270 # Get the relative path to the outdir.
|
|
271 # Use this to remove it from the list of files/folders that need to be processed
|
|
272 # in case it's a subfolder of the input directory.
|
|
273 #
|
|
274 $outdir =~ m/\/([^\/]+)\/$/;
|
|
275 my $outdir_rel = $1;
|
|
276
|
|
277 #
|
|
278 # Get and parse all files from the input dir.
|
|
279 #
|
|
280 eval{
|
|
281 opendir (INDIR, $indir);
|
|
282 @files = grep { /.+\.$extension/i and not /^\..*/ and not /$outdir_rel/} readdir INDIR;
|
|
283 closedir INDIR;
|
|
284 };
|
|
285 if ($@) {
|
|
286 $logger->fatal("Cannot read files from input directory $indir: $@");
|
|
287 exit;
|
|
288 }
|
|
289
|
|
290 return(\@files);
|
|
291 }
|
|
292
|
|
293 sub _ConvertFastaHeaders {
|
|
294
|
|
295 $logger->debug('_ConvertFastaHeaders sub');
|
|
296
|
|
297 my ($pathfrom, $pathto, $action, $x_fixes_array, $new_x_fix, $position, $ids_to_delete, $new_id_order) = @_;
|
|
298
|
|
299 my $header_count = 0;
|
|
300
|
|
301 #local($/) = "\n\n"; # set line seperator to a blank line
|
|
302 open(READ,"<$pathfrom") or die "\tcan't open input file $pathfrom: $!";
|
|
303 open(SAVE,">$pathto") or die "\tcan't open output file $pathto: $!";
|
|
304 while (my $line = <READ>) {
|
|
305
|
|
306 my $new_line;
|
|
307
|
|
308 if ($line =~ /^>/) {
|
|
309
|
|
310 #
|
|
311 # It's a header line.
|
|
312 #
|
|
313 $header_count++;
|
|
314 my $ids_string;
|
|
315 my $description;
|
|
316 my $line_end;
|
|
317
|
|
318 if ($line =~ /^>([^\s]+)\s+(.+)([\n\r\f]+)/i) {
|
|
319
|
|
320 #
|
|
321 # Header with descripton
|
|
322 #
|
|
323 $ids_string = $1;
|
|
324 $description = $2;
|
|
325 $line_end = $3;
|
|
326
|
|
327 } elsif ($line =~ /^>([^\s]+)\s*([\n\r\f]+)/i) {
|
|
328
|
|
329 #
|
|
330 # Header without descripton
|
|
331 #
|
|
332 $ids_string = $1;
|
|
333 $line_end = $2;
|
|
334
|
|
335 } else {
|
|
336
|
|
337 $logger->fatal("Malformed header line. Cannot find ID.");
|
|
338 exit;
|
|
339
|
|
340 }
|
|
341
|
|
342 my @ids = split(/\|/, $ids_string);
|
|
343
|
|
344 if ($action eq 'strip') {
|
|
345
|
|
346 $new_line = _StripFix($x_fixes_array, $ids_string, $description);
|
|
347
|
|
348 } elsif ($action eq 'replace') {
|
|
349
|
|
350 $new_line = _ReplaceFix($x_fixes_array, $new_x_fix, $position, \@ids, $description);
|
|
351
|
|
352 } elsif ($action eq 'add') {
|
|
353
|
|
354 $new_line = _AddFix($x_fixes_array, $position, \@ids, $description);
|
|
355
|
|
356 } elsif ($action eq 'delete') {
|
|
357
|
|
358 $new_line = _DeleteID($ids_to_delete, \@ids, $description);
|
|
359
|
|
360 } elsif ($action eq 'shuffle') {
|
|
361
|
|
362 $new_line = _ShuffleID($new_id_order, \@ids, $description);
|
|
363
|
|
364 }
|
|
365
|
|
366 unless (defined($new_line)) {
|
|
367
|
|
368 $logger->fatal('Cannot convert header number: ' . $header_count);
|
|
369 $logger->fatal('Offending header line was: ' . $line);
|
|
370 exit;
|
|
371
|
|
372 }
|
|
373
|
|
374 $new_line .= $line_end;
|
|
375
|
|
376 } elsif ($line =~ /^[\n\r\f]+$/) {
|
|
377
|
|
378 # Skip blank line.
|
|
379
|
|
380 } else {
|
|
381
|
|
382 #
|
|
383 # It must be a sequence line.
|
|
384 #
|
|
385 $new_line = $line;
|
|
386
|
|
387 }
|
|
388
|
|
389 # Save (modified) line.
|
|
390 print SAVE $new_line or die "\tcan't save output to file $pathto: $!";
|
|
391
|
|
392 }
|
|
393
|
|
394 close(READ);
|
|
395 close(SAVE);
|
|
396
|
|
397 }
|
|
398
|
|
399 sub _StripFix {
|
|
400
|
|
401 my ($x_fixes_array, $ids_string, $description) = @_;
|
|
402 my $new_line;
|
|
403
|
|
404 foreach my $x_fix (@{$x_fixes_array}) {
|
|
405
|
|
406 $ids_string =~ s/$x_fix//g;
|
|
407
|
|
408 }
|
|
409
|
|
410 if (defined($description)) {
|
|
411 $new_line = '>' . $ids_string . ' ' . $description;
|
|
412 } else {
|
|
413 $new_line = '>' . $ids_string;
|
|
414 }
|
|
415
|
|
416 return($new_line);
|
|
417
|
|
418 }
|
|
419
|
|
420 sub _ReplaceFix {
|
|
421
|
|
422 my ($x_fixes_array, $new_x_fix, $position, $ids, $description) = @_;
|
|
423 my $new_line = '>';
|
|
424
|
|
425 for my $count (0 .. $#{$ids}) {
|
|
426
|
|
427 my $id = ${$ids}[$count];
|
|
428 my $stripped_id;
|
|
429 my $match = 0;
|
|
430
|
|
431 if ($position eq 'prefix' || $position eq 'pre2suf') {
|
|
432
|
|
433 foreach my $x_fix (@{$x_fixes_array}) {
|
|
434
|
|
435 if ($id =~ m/^$x_fix(.+)/) {
|
|
436
|
|
437 $stripped_id = $1;
|
|
438 $id = $stripped_id;
|
|
439 $match = 1;
|
|
440
|
|
441 }
|
|
442 }
|
|
443
|
|
444 } elsif ($position eq 'suffix' || $position eq 'suf2pre') {
|
|
445
|
|
446 foreach my $x_fix (@{$x_fixes_array}) {
|
|
447
|
|
448 if ($id =~ m/(.+)$x_fix$/) {
|
|
449
|
|
450 $stripped_id = $1;
|
|
451 $id = $stripped_id;
|
|
452 $match = 1;
|
|
453
|
|
454 }
|
|
455 }
|
|
456
|
|
457 } else {
|
|
458
|
|
459 $logger->fatal("Illegal or no position $position specified.");
|
|
460 exit;
|
|
461
|
|
462 }
|
|
463
|
|
464 if ($match) {
|
|
465
|
|
466 #
|
|
467 # Append the new *fix.
|
|
468 #
|
|
469 if ($position eq 'prefix' || $position eq 'suf2pre') {
|
|
470
|
|
471 $new_line .= $new_x_fix . $stripped_id . '|';
|
|
472
|
|
473 } elsif ($position eq 'pre2suf' || $position eq 'suffix') {
|
|
474
|
|
475 $new_line .= $stripped_id . $new_x_fix . '|';
|
|
476
|
|
477 }
|
|
478
|
|
479 } else {
|
|
480
|
|
481 #
|
|
482 # Copy the ID unmodified to the result.
|
|
483 #
|
|
484 $new_line .= ${$ids}[$count] . '|';
|
|
485
|
|
486 }
|
|
487 }
|
|
488
|
|
489 $new_line =~ s/\|$//;
|
|
490 if (defined($description)) {
|
|
491 $new_line .= ' ' . $description;
|
|
492 }
|
|
493
|
|
494 return($new_line);
|
|
495
|
|
496 }
|
|
497
|
|
498 sub _AddFix {
|
|
499
|
|
500 my ($x_fixes_array, $position, $ids, $description) = @_;
|
|
501 my $new_line = '>';
|
|
502
|
|
503 my $id_count = scalar(@{$ids});
|
|
504 my $x_fix_count = scalar(@{$x_fixes_array});
|
|
505
|
|
506 unless ($id_count == $x_fix_count) {
|
|
507 $logger->fatal('Amount of pre- or suffixes specified (' . $x_fix_count . ') does not match with amount if IDs found ' . $id_count . ').');
|
|
508 return(undef);
|
|
509 }
|
|
510
|
|
511 for my $count (0 .. $#{$ids}) {
|
|
512
|
|
513 if ($position eq 'prefix') {
|
|
514
|
|
515 $new_line .= ${$x_fixes_array}[$count] . ${$ids}[$count] . '|';
|
|
516
|
|
517 } elsif ($position eq 'suffix') {
|
|
518
|
|
519 $new_line .= ${$ids}[$count] . ${$x_fixes_array}[$count] . '|';
|
|
520
|
|
521 }
|
|
522 }
|
|
523
|
|
524 $new_line =~ s/\|$//;
|
|
525 if (defined($description)) {
|
|
526 $new_line .= ' ' . $description;
|
|
527 }
|
|
528
|
|
529 return($new_line);
|
|
530
|
|
531 }
|
|
532
|
|
533 sub _DeleteID {
|
|
534
|
|
535 my ($ids_to_delete, $ids, $description) = @_;
|
|
536 my $new_line = '>';
|
|
537
|
|
538 $new_line = '>';
|
|
539
|
|
540 for my $offset (0 .. $#{$ids}) {
|
|
541
|
|
542 my $index = $offset + 1;
|
|
543
|
|
544 if (defined(${$ids_to_delete}{$index})) {
|
|
545
|
|
546 # Skip (drop) this ID.
|
|
547 $logger->debug('Dropping ' . ${$ids}[$offset] . ' as it is ID number ' . $index . '.');
|
|
548
|
|
549 } else {
|
|
550
|
|
551 $new_line .= ${$ids}[$offset] . '|';
|
|
552
|
|
553 }
|
|
554 }
|
|
555
|
|
556 $new_line =~ s/\|$//;
|
|
557 if (defined($description)) {
|
|
558 $new_line .= ' ' . $description;
|
|
559 }
|
|
560
|
|
561 return($new_line);
|
|
562
|
|
563 }
|
|
564
|
|
565 sub _ShuffleID {
|
|
566
|
|
567 my ($new_id_order, $ids, $description) = @_;
|
|
568 my $new_line = '>';
|
|
569
|
|
570 my $id_count = scalar(@{$ids});
|
|
571 my $new_id_order_item_count = scalar(@{$new_id_order});
|
|
572
|
|
573 unless ($id_count == $new_id_order_item_count) {
|
|
574 $logger->fatal('Amount of IDs specified to re-order (' . $new_id_order_item_count . ') does not match with amount if IDs found (' . $id_count . ').');
|
|
575 return(undef);
|
|
576 }
|
|
577
|
|
578 $new_line = '>';
|
|
579
|
|
580 foreach my $rank (@{$new_id_order}) {
|
|
581
|
|
582 my $offset = $rank - 1;
|
|
583 $logger->debug('ID rank ' . $rank . ' = ' . ${$ids}[$offset] . '.');
|
|
584 $new_line .= ${$ids}[$offset] . '|';
|
|
585 $logger->debug('New header line now contains ' . $new_line . '.');
|
|
586
|
|
587 }
|
|
588
|
|
589 $new_line =~ s/\|$//;
|
|
590 if (defined($description)) {
|
|
591 $new_line .= ' ' . $description;
|
|
592 }
|
|
593
|
|
594 return($new_line);
|
|
595
|
|
596 }
|
|
597
|
|
598 sub _Usage {
|
|
599
|
|
600 print "\n";
|
|
601 print "ConvertFastaHeaders.pl - Converts sequence headers of FASTA files.\n";
|
|
602 print "\n";
|
|
603 print "Usage:\n";
|
|
604 print "\n";
|
|
605 print " ConvertFastaHeaders.pl options\n";
|
|
606 print "\n";
|
|
607 print "Available options are:\n";
|
|
608 print "\n";
|
|
609 print " -i [dir/file] Input can be a single FASTA file or a directory containing FASTA files.\n";
|
|
610 print " -e [ext] File name extension for the FASTA files in case the input is a directory. (default = fa)\n";
|
|
611 print " -o [dir/file] Output file or directory where the result(s) will be saved.\n";
|
|
612 print " -a [action] Action must be one of 'add', 'strip', 'replace', 'delete' or 'shuffle'.\n";
|
|
613 print " The actions 'delete' and 'shuffle' operate on complete sequence IDs with or without (database namespace) prefixes or suffixes.\n";
|
|
614 print " The actions 'add', 'strip' and 'replace' operate on sequence ID prefixes or suffixes.\n";
|
|
615 print " Note in case *fixes are added the order of the *fixes is important! (See below for examples.)\n";
|
|
616 print " -p [position] Positon must be a comma separated list of numbers in case the action is 'delete' or 'shuffle'.\n";
|
|
617 print " Position must be one of 'prefix' or 'suffix' when the action is 'add' or 'strip'.\n";
|
|
618 print " In case the action is 'replace' the position can also be one of pre2suf or suf2pre \n";
|
|
619 print " to replace a prefix with a suffix or vice versa.\n";
|
|
620 print " -f '[*fix1 *fix2 *fixN]' Space separated list of prefixes or suffixes, which will be replaced in, added to or removed from pipe separated identifiers.\n";
|
|
621 print " Note that in case of database namespace prefixes you must specify both the database name space and \n";
|
|
622 print " the character to separate the namespace from the accession number as the prefix. (See below for examples.) \n";
|
|
623 print " -n '[*fix]' A single new prefix or suffix to replace the *fixes specified with -f.\n";
|
|
624 print " (Only required in case the action is 'replace'.)\n";
|
|
625 print " -l [LEVEL] Log4perl log level. One of: ALL, TRACE, DEBUG, INFO (default), WARN, ERROR, FATAL or OFF.\n";
|
|
626 print "\n";
|
|
627 print "Examples:\n";
|
|
628 print "\n";
|
|
629 print " Adding prefixes\n";
|
|
630 print " In this case the order of the *fixes specified with -f is important!\n";
|
|
631 print " With -a add -p prefix -f 'UniProtAcc: UniProtID:', this header:\n";
|
|
632 print " >P32234|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
633 print " will be converted into:\n";
|
|
634 print " >UniProtAcc:P32234|UniProtID:128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
635 print " Stripping prefixes\n";
|
|
636 print " In this case the order of the *fixes specified with -f is not relevant.\n";
|
|
637 print " With both -a strip -p prefix -f 'UniProtAcc: UniProtID:' or \n";
|
|
638 print " with -a strip -p prefix -f 'UniProtID: UniProtAcc:', this header:\n";
|
|
639 print " >UniProtAcc:P32234|UniProtID:128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
640 print " will be converted into:\n";
|
|
641 print " >P32234|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
642 print " Replacing prefixes with a suffix\n";
|
|
643 print " In this case the order of the *fixes specified with -f is not relevant.\n";
|
|
644 print " With -a replace -p pre2suf -f 'REV_' -n '_REV', this header:\n";
|
|
645 print " >REV_P32234|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
646 print " will be converted into:\n";
|
|
647 print " >P32234_REV|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
648 print " Deleting sequence identifiers\n";
|
|
649 print " Supply a comma separated list of numbers for the ranks of the identifiers / accession numbers you want to remove.\n";
|
|
650 print " Multiple identifiers must be separated with a pipe symbol.\n";
|
|
651 print " With -a delete -p '1,3', this header:\n";
|
|
652 print " >UniProtID:128UP_DROME|UniProtAcc:P32234|EMBL:AY069810 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
653 print " will be converted into:\n";
|
|
654 print " >UniProtAcc:P32234 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
655 print " Changing the order of sequence identifiers\n";
|
|
656 print " Supply a comma separated list of numbers for the new order of all the identifiers / accession numbers in a header.\n";
|
|
657 print " Multiple identifiers must be separated with a pipe symbol.\n";
|
|
658 print " Hence if your headers contain 4 pipe separated IDs and you only want to swap the order of the first and the second, \n";
|
|
659 print " you will still need to specify the new (unchanged) order for number 3 and 4 too.\n";
|
|
660 print " With -a shuffle -p '2,1,3', this header:\n";
|
|
661 print " >UniProtID:128UP_DROME|UniProtAcc:P32234|EMBL:AY069810 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
662 print " will be converted into:\n";
|
|
663 print " >UniProtAcc:P32234|UniProtID:128UP_DROME|EMBL:AY069810 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n";
|
|
664 print " Specifying only *2,1* as the New order for the IDs will not work, because this header contains 3 IDs, \n";
|
|
665 print " so you'll have to include the (new) position for the third one as well.\n";
|
|
666 print "\n";
|
|
667 exit;
|
|
668
|
|
669 }
|