Mercurial > repos > galaxyp > nbic_fasta
comparison ConvertFastaHeaders.pl @ 0:163892325845 draft default tip
Initial commit.
author | galaxyp |
---|---|
date | Fri, 10 May 2013 17:15:08 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:163892325845 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 # | |
4 # convertFastaHeaders.pl | |
5 # | |
6 # $Id: ConvertFastaHeaders.pl 44 2010-10-18 12:58:41Z pieter.neerincx@gmail.com $ | |
7 # $URL: https://trac.nbic.nl/svn/galaxytools/trunk/tools/general/FastaTools/ConvertFastaHeaders.pl $ | |
8 # $LastChangedDate: 2010-10-18 07:58:41 -0500 (Mon, 18 Oct 2010) $ | |
9 # $LastChangedRevision: 44 $ | |
10 # $LastChangedBy: pieter.neerincx@gmail.com $ | |
11 # | |
12 # Converts sequence header of FASTA files (in various customisable ways). | |
13 # | |
14 | |
15 # | |
16 # Initialize evironment | |
17 # | |
18 use strict; | |
19 use Getopt::Std; | |
20 use Log::Log4perl qw(:easy); | |
21 | |
22 my %log_levels = ( | |
23 'ALL' => $ALL, | |
24 'TRACE' => $TRACE, | |
25 'DEBUG' => $DEBUG, | |
26 'INFO' => $INFO, | |
27 'WARN' => $WARN, | |
28 'ERROR' => $ERROR, | |
29 'FATAL' => $FATAL, | |
30 'OFF' => $OFF, | |
31 ); | |
32 | |
33 # | |
34 # Get options. | |
35 # | |
36 my %opts; | |
37 Getopt::Std::getopts('i:o:l:e:f:n:a:p:', \%opts); | |
38 my $input = $opts{'i'}; | |
39 my $output = $opts{'o'}; | |
40 my $log_level = $opts{'l'}; | |
41 my $extension = $opts{'e'}; | |
42 my @x_fixes_array = split(/\s+/, $opts{'f'}); | |
43 my $new_x_fix = $opts{'n'}; | |
44 my $action = $opts{'a'}; | |
45 my $position = $opts{'p'}; | |
46 my %ids_to_delete; | |
47 my @new_id_order; | |
48 | |
49 # | |
50 # Configure logging. | |
51 # | |
52 # Provides default if user did not specify log level: | |
53 $log_level = (defined($log_level) ? $log_level : 'WARN'); | |
54 # Reset log level to default if user specified illegal log level. | |
55 $log_level = (defined($log_levels{$log_level}) ? $log_levels{$log_level} : $log_levels{'WARN'}); | |
56 #Log::Log4perl->init('log4perl.properties'); | |
57 Log::Log4perl->easy_init( | |
58 #{ level => $log_level, | |
59 # file => ">>ConvertFastaHeaders.log", | |
60 # layout => '%F{1}-%L-%M: %m%n' }, | |
61 { level => $log_level, | |
62 file => "STDERR", | |
63 layout => '%d L:%L %p> %m%n' }, | |
64 ); | |
65 my $logger = Log::Log4perl::get_logger(); | |
66 | |
67 # | |
68 # Start the conversion process. | |
69 # | |
70 $logger->info("Starting..."); | |
71 | |
72 # | |
73 # Check user input. | |
74 # | |
75 | |
76 # Provides default if user did not specify action: | |
77 $action = (defined($action) ? $action : 'add'); | |
78 | |
79 # Check for valid action and action specific options. | |
80 if ($action eq 'add' || $action eq 'strip' || $action eq 'replace') { | |
81 | |
82 unless (scalar(@x_fixes_array) > 0) { | |
83 $logger->fatal('No prefixes or suffixes specified.'); | |
84 _Usage(); | |
85 } | |
86 | |
87 if ($action eq 'replace') { | |
88 unless (defined($new_x_fix) && $new_x_fix ne '') { | |
89 $logger->fatal('No new prefix or suffix specified to replace the existing ones.'); | |
90 _Usage(); | |
91 } | |
92 } | |
93 | |
94 # Provides default if user did not specify position: | |
95 $position = (defined($position) ? $position : 'prefix'); | |
96 # Check for valid position. | |
97 if ($action eq 'add' || $action eq 'strip') { | |
98 unless ($position eq 'prefix' || $position eq 'suffix') { | |
99 $logger->fatal('Illegal position specified. Must be \'prefix\' or \'suffix\'.'); | |
100 _Usage(); | |
101 } | |
102 } elsif ($action eq 'replace') { | |
103 unless ($position eq 'prefix' || $position eq 'suffix' || $position eq 'pre2suf' || $position eq 'suf2pre') { | |
104 $logger->fatal('Illegal position specified. Must be \'prefix\', \'suffix\', \'pre2suf\' or \'suf2pre\'.'); | |
105 _Usage(); | |
106 } | |
107 } | |
108 | |
109 } elsif ($action eq 'delete' || $action eq 'shuffle') { | |
110 | |
111 unless (defined($position) && $position ne '') { | |
112 $logger->fatal('No position specified.'); | |
113 _Usage(); | |
114 } | |
115 | |
116 my @id_indices = split(/,/, $position); | |
117 | |
118 # Check if the value is a number. | |
119 foreach my $index_number (@id_indices) { | |
120 | |
121 unless ($index_number =~ m/^[1-9][0-9]*$/) { | |
122 | |
123 $logger->fatal('Illegal character in position list. Must be a single positive integer or comma separated list of positive integers.'); | |
124 _Usage(); | |
125 | |
126 } | |
127 | |
128 if ($action eq 'delete') { | |
129 | |
130 $ids_to_delete{$index_number} = 'del'; | |
131 | |
132 } elsif ($action eq 'shuffle') { | |
133 | |
134 push(@new_id_order, $index_number); | |
135 | |
136 } | |
137 } | |
138 | |
139 } else { | |
140 $logger->fatal('Illegal action specified. Must be add, strip, replace, delete or shuffle.'); | |
141 _Usage(); | |
142 } | |
143 | |
144 | |
145 # Provides default if user did not specify log level: | |
146 $log_level = (defined($log_level) ? $log_level : 'WARN'); | |
147 # Reset log level to default if user specified illegal log level. | |
148 $log_level = (defined($log_levels{$log_level}) ? $log_levels{$log_level} : $log_levels{'WARN'}); | |
149 | |
150 # Provide default if user did not specify fasta filename extension: | |
151 $extension = (defined($extension) ? $extension : 'fa'); | |
152 | |
153 if ($input =~ /^$/ || $output =~ /^$/) { | |
154 # Indir and outdir cannot be empty. | |
155 _Usage(); | |
156 } | |
157 if ($input eq $output) { | |
158 $logger->fatal("Output dir/file is the same as the input dir/file. Please choose a different one."); | |
159 exit; | |
160 } | |
161 | |
162 # | |
163 # Check if input is a single file or a directory. | |
164 # | |
165 unless (-e $input && -r $input) { | |
166 | |
167 $logger->fatal("Input $input does not exist or is not readable: $!"); | |
168 exit; | |
169 | |
170 } else { | |
171 | |
172 if (-f $input) { | |
173 | |
174 # | |
175 # We've got an input file. | |
176 # | |
177 my $file; | |
178 if ($input =~ m/(.+\/)([^\/]+)$/) { | |
179 $file = $2; | |
180 } else { | |
181 $file = $input; | |
182 } | |
183 | |
184 $logger->info('Parsing ' . $file . "...\n"); | |
185 | |
186 _ConvertFastaHeaders($input, $output, $action, \@x_fixes_array, $new_x_fix, $position, \%ids_to_delete, \@new_id_order); | |
187 | |
188 $logger->info('Converted ' . $file); | |
189 | |
190 } else { | |
191 | |
192 # | |
193 # We've got an input directory. | |
194 # Assume the output is also a directory. | |
195 # Append trailing path separators if they was missing. | |
196 # | |
197 my $indir; | |
198 my $outdir; | |
199 unless ($input =~ m/\/$/) { | |
200 $input = $input .+ '/'; | |
201 } | |
202 unless ($output =~ m/\/$/) { | |
203 $output = $output .+ '/'; | |
204 } | |
205 # | |
206 # Make sure the input dir is a directory. | |
207 # | |
208 unless (-d $input) { | |
209 $logger->fatal("Input $input is not a file nor a directory: $!"); | |
210 exit; | |
211 } else { | |
212 $indir = $input; | |
213 $outdir = $output; | |
214 } | |
215 | |
216 # | |
217 # Get all FASTA files from the input dir. | |
218 # | |
219 my $files = _GetFiles($indir, $outdir, $extension); | |
220 | |
221 # | |
222 # Create the output directory if did not exist yet. | |
223 # | |
224 if (-e $outdir && -d $outdir) { | |
225 unless (-w $outdir) { | |
226 $logger->fatal("Cannot write to output directory $outdir. Check for permission errors, read-only file systems, etc."); | |
227 exit; | |
228 } | |
229 } else { | |
230 $logger->info("Creating output directory $outdir..."); | |
231 eval{mkdir($outdir);}; | |
232 if ($@) { | |
233 $logger->fatal("Cannot create output directory $outdir: $@"); | |
234 exit; | |
235 } | |
236 } | |
237 | |
238 # | |
239 # Convert FASTA files. | |
240 # | |
241 foreach my $file (@{$files}) { | |
242 | |
243 $logger->info('Parsing ' . $file . "...\n"); | |
244 | |
245 my $pathfrom = $indir .+ $file; | |
246 my $pathto = $outdir .+ $file; | |
247 | |
248 _ConvertFastaHeaders($input, $output, $action, \@x_fixes_array, $new_x_fix, $position, \%ids_to_delete, \@new_id_order); | |
249 | |
250 $logger->info('Converted ' . $file); | |
251 | |
252 } | |
253 } | |
254 } | |
255 | |
256 $logger->info('Finished!'); | |
257 | |
258 # | |
259 ## | |
260 ### Internal subs. | |
261 ## | |
262 # | |
263 | |
264 sub _GetFiles { | |
265 | |
266 my ($indir, $outdir, $extension) = @_; | |
267 my @files; | |
268 | |
269 # | |
270 # Get the relative path to the outdir. | |
271 # Use this to remove it from the list of files/folders that need to be processed | |
272 # in case it's a subfolder of the input directory. | |
273 # | |
274 $outdir =~ m/\/([^\/]+)\/$/; | |
275 my $outdir_rel = $1; | |
276 | |
277 # | |
278 # Get and parse all files from the input dir. | |
279 # | |
280 eval{ | |
281 opendir (INDIR, $indir); | |
282 @files = grep { /.+\.$extension/i and not /^\..*/ and not /$outdir_rel/} readdir INDIR; | |
283 closedir INDIR; | |
284 }; | |
285 if ($@) { | |
286 $logger->fatal("Cannot read files from input directory $indir: $@"); | |
287 exit; | |
288 } | |
289 | |
290 return(\@files); | |
291 } | |
292 | |
293 sub _ConvertFastaHeaders { | |
294 | |
295 $logger->debug('_ConvertFastaHeaders sub'); | |
296 | |
297 my ($pathfrom, $pathto, $action, $x_fixes_array, $new_x_fix, $position, $ids_to_delete, $new_id_order) = @_; | |
298 | |
299 my $header_count = 0; | |
300 | |
301 #local($/) = "\n\n"; # set line seperator to a blank line | |
302 open(READ,"<$pathfrom") or die "\tcan't open input file $pathfrom: $!"; | |
303 open(SAVE,">$pathto") or die "\tcan't open output file $pathto: $!"; | |
304 while (my $line = <READ>) { | |
305 | |
306 my $new_line; | |
307 | |
308 if ($line =~ /^>/) { | |
309 | |
310 # | |
311 # It's a header line. | |
312 # | |
313 $header_count++; | |
314 my $ids_string; | |
315 my $description; | |
316 my $line_end; | |
317 | |
318 if ($line =~ /^>([^\s]+)\s+(.+)([\n\r\f]+)/i) { | |
319 | |
320 # | |
321 # Header with descripton | |
322 # | |
323 $ids_string = $1; | |
324 $description = $2; | |
325 $line_end = $3; | |
326 | |
327 } elsif ($line =~ /^>([^\s]+)\s*([\n\r\f]+)/i) { | |
328 | |
329 # | |
330 # Header without descripton | |
331 # | |
332 $ids_string = $1; | |
333 $line_end = $2; | |
334 | |
335 } else { | |
336 | |
337 $logger->fatal("Malformed header line. Cannot find ID."); | |
338 exit; | |
339 | |
340 } | |
341 | |
342 my @ids = split(/\|/, $ids_string); | |
343 | |
344 if ($action eq 'strip') { | |
345 | |
346 $new_line = _StripFix($x_fixes_array, $ids_string, $description); | |
347 | |
348 } elsif ($action eq 'replace') { | |
349 | |
350 $new_line = _ReplaceFix($x_fixes_array, $new_x_fix, $position, \@ids, $description); | |
351 | |
352 } elsif ($action eq 'add') { | |
353 | |
354 $new_line = _AddFix($x_fixes_array, $position, \@ids, $description); | |
355 | |
356 } elsif ($action eq 'delete') { | |
357 | |
358 $new_line = _DeleteID($ids_to_delete, \@ids, $description); | |
359 | |
360 } elsif ($action eq 'shuffle') { | |
361 | |
362 $new_line = _ShuffleID($new_id_order, \@ids, $description); | |
363 | |
364 } | |
365 | |
366 unless (defined($new_line)) { | |
367 | |
368 $logger->fatal('Cannot convert header number: ' . $header_count); | |
369 $logger->fatal('Offending header line was: ' . $line); | |
370 exit; | |
371 | |
372 } | |
373 | |
374 $new_line .= $line_end; | |
375 | |
376 } elsif ($line =~ /^[\n\r\f]+$/) { | |
377 | |
378 # Skip blank line. | |
379 | |
380 } else { | |
381 | |
382 # | |
383 # It must be a sequence line. | |
384 # | |
385 $new_line = $line; | |
386 | |
387 } | |
388 | |
389 # Save (modified) line. | |
390 print SAVE $new_line or die "\tcan't save output to file $pathto: $!"; | |
391 | |
392 } | |
393 | |
394 close(READ); | |
395 close(SAVE); | |
396 | |
397 } | |
398 | |
399 sub _StripFix { | |
400 | |
401 my ($x_fixes_array, $ids_string, $description) = @_; | |
402 my $new_line; | |
403 | |
404 foreach my $x_fix (@{$x_fixes_array}) { | |
405 | |
406 $ids_string =~ s/$x_fix//g; | |
407 | |
408 } | |
409 | |
410 if (defined($description)) { | |
411 $new_line = '>' . $ids_string . ' ' . $description; | |
412 } else { | |
413 $new_line = '>' . $ids_string; | |
414 } | |
415 | |
416 return($new_line); | |
417 | |
418 } | |
419 | |
420 sub _ReplaceFix { | |
421 | |
422 my ($x_fixes_array, $new_x_fix, $position, $ids, $description) = @_; | |
423 my $new_line = '>'; | |
424 | |
425 for my $count (0 .. $#{$ids}) { | |
426 | |
427 my $id = ${$ids}[$count]; | |
428 my $stripped_id; | |
429 my $match = 0; | |
430 | |
431 if ($position eq 'prefix' || $position eq 'pre2suf') { | |
432 | |
433 foreach my $x_fix (@{$x_fixes_array}) { | |
434 | |
435 if ($id =~ m/^$x_fix(.+)/) { | |
436 | |
437 $stripped_id = $1; | |
438 $id = $stripped_id; | |
439 $match = 1; | |
440 | |
441 } | |
442 } | |
443 | |
444 } elsif ($position eq 'suffix' || $position eq 'suf2pre') { | |
445 | |
446 foreach my $x_fix (@{$x_fixes_array}) { | |
447 | |
448 if ($id =~ m/(.+)$x_fix$/) { | |
449 | |
450 $stripped_id = $1; | |
451 $id = $stripped_id; | |
452 $match = 1; | |
453 | |
454 } | |
455 } | |
456 | |
457 } else { | |
458 | |
459 $logger->fatal("Illegal or no position $position specified."); | |
460 exit; | |
461 | |
462 } | |
463 | |
464 if ($match) { | |
465 | |
466 # | |
467 # Append the new *fix. | |
468 # | |
469 if ($position eq 'prefix' || $position eq 'suf2pre') { | |
470 | |
471 $new_line .= $new_x_fix . $stripped_id . '|'; | |
472 | |
473 } elsif ($position eq 'pre2suf' || $position eq 'suffix') { | |
474 | |
475 $new_line .= $stripped_id . $new_x_fix . '|'; | |
476 | |
477 } | |
478 | |
479 } else { | |
480 | |
481 # | |
482 # Copy the ID unmodified to the result. | |
483 # | |
484 $new_line .= ${$ids}[$count] . '|'; | |
485 | |
486 } | |
487 } | |
488 | |
489 $new_line =~ s/\|$//; | |
490 if (defined($description)) { | |
491 $new_line .= ' ' . $description; | |
492 } | |
493 | |
494 return($new_line); | |
495 | |
496 } | |
497 | |
498 sub _AddFix { | |
499 | |
500 my ($x_fixes_array, $position, $ids, $description) = @_; | |
501 my $new_line = '>'; | |
502 | |
503 my $id_count = scalar(@{$ids}); | |
504 my $x_fix_count = scalar(@{$x_fixes_array}); | |
505 | |
506 unless ($id_count == $x_fix_count) { | |
507 $logger->fatal('Amount of pre- or suffixes specified (' . $x_fix_count . ') does not match with amount if IDs found ' . $id_count . ').'); | |
508 return(undef); | |
509 } | |
510 | |
511 for my $count (0 .. $#{$ids}) { | |
512 | |
513 if ($position eq 'prefix') { | |
514 | |
515 $new_line .= ${$x_fixes_array}[$count] . ${$ids}[$count] . '|'; | |
516 | |
517 } elsif ($position eq 'suffix') { | |
518 | |
519 $new_line .= ${$ids}[$count] . ${$x_fixes_array}[$count] . '|'; | |
520 | |
521 } | |
522 } | |
523 | |
524 $new_line =~ s/\|$//; | |
525 if (defined($description)) { | |
526 $new_line .= ' ' . $description; | |
527 } | |
528 | |
529 return($new_line); | |
530 | |
531 } | |
532 | |
533 sub _DeleteID { | |
534 | |
535 my ($ids_to_delete, $ids, $description) = @_; | |
536 my $new_line = '>'; | |
537 | |
538 $new_line = '>'; | |
539 | |
540 for my $offset (0 .. $#{$ids}) { | |
541 | |
542 my $index = $offset + 1; | |
543 | |
544 if (defined(${$ids_to_delete}{$index})) { | |
545 | |
546 # Skip (drop) this ID. | |
547 $logger->debug('Dropping ' . ${$ids}[$offset] . ' as it is ID number ' . $index . '.'); | |
548 | |
549 } else { | |
550 | |
551 $new_line .= ${$ids}[$offset] . '|'; | |
552 | |
553 } | |
554 } | |
555 | |
556 $new_line =~ s/\|$//; | |
557 if (defined($description)) { | |
558 $new_line .= ' ' . $description; | |
559 } | |
560 | |
561 return($new_line); | |
562 | |
563 } | |
564 | |
565 sub _ShuffleID { | |
566 | |
567 my ($new_id_order, $ids, $description) = @_; | |
568 my $new_line = '>'; | |
569 | |
570 my $id_count = scalar(@{$ids}); | |
571 my $new_id_order_item_count = scalar(@{$new_id_order}); | |
572 | |
573 unless ($id_count == $new_id_order_item_count) { | |
574 $logger->fatal('Amount of IDs specified to re-order (' . $new_id_order_item_count . ') does not match with amount if IDs found (' . $id_count . ').'); | |
575 return(undef); | |
576 } | |
577 | |
578 $new_line = '>'; | |
579 | |
580 foreach my $rank (@{$new_id_order}) { | |
581 | |
582 my $offset = $rank - 1; | |
583 $logger->debug('ID rank ' . $rank . ' = ' . ${$ids}[$offset] . '.'); | |
584 $new_line .= ${$ids}[$offset] . '|'; | |
585 $logger->debug('New header line now contains ' . $new_line . '.'); | |
586 | |
587 } | |
588 | |
589 $new_line =~ s/\|$//; | |
590 if (defined($description)) { | |
591 $new_line .= ' ' . $description; | |
592 } | |
593 | |
594 return($new_line); | |
595 | |
596 } | |
597 | |
598 sub _Usage { | |
599 | |
600 print "\n"; | |
601 print "ConvertFastaHeaders.pl - Converts sequence headers of FASTA files.\n"; | |
602 print "\n"; | |
603 print "Usage:\n"; | |
604 print "\n"; | |
605 print " ConvertFastaHeaders.pl options\n"; | |
606 print "\n"; | |
607 print "Available options are:\n"; | |
608 print "\n"; | |
609 print " -i [dir/file] Input can be a single FASTA file or a directory containing FASTA files.\n"; | |
610 print " -e [ext] File name extension for the FASTA files in case the input is a directory. (default = fa)\n"; | |
611 print " -o [dir/file] Output file or directory where the result(s) will be saved.\n"; | |
612 print " -a [action] Action must be one of 'add', 'strip', 'replace', 'delete' or 'shuffle'.\n"; | |
613 print " The actions 'delete' and 'shuffle' operate on complete sequence IDs with or without (database namespace) prefixes or suffixes.\n"; | |
614 print " The actions 'add', 'strip' and 'replace' operate on sequence ID prefixes or suffixes.\n"; | |
615 print " Note in case *fixes are added the order of the *fixes is important! (See below for examples.)\n"; | |
616 print " -p [position] Positon must be a comma separated list of numbers in case the action is 'delete' or 'shuffle'.\n"; | |
617 print " Position must be one of 'prefix' or 'suffix' when the action is 'add' or 'strip'.\n"; | |
618 print " In case the action is 'replace' the position can also be one of pre2suf or suf2pre \n"; | |
619 print " to replace a prefix with a suffix or vice versa.\n"; | |
620 print " -f '[*fix1 *fix2 *fixN]' Space separated list of prefixes or suffixes, which will be replaced in, added to or removed from pipe separated identifiers.\n"; | |
621 print " Note that in case of database namespace prefixes you must specify both the database name space and \n"; | |
622 print " the character to separate the namespace from the accession number as the prefix. (See below for examples.) \n"; | |
623 print " -n '[*fix]' A single new prefix or suffix to replace the *fixes specified with -f.\n"; | |
624 print " (Only required in case the action is 'replace'.)\n"; | |
625 print " -l [LEVEL] Log4perl log level. One of: ALL, TRACE, DEBUG, INFO (default), WARN, ERROR, FATAL or OFF.\n"; | |
626 print "\n"; | |
627 print "Examples:\n"; | |
628 print "\n"; | |
629 print " Adding prefixes\n"; | |
630 print " In this case the order of the *fixes specified with -f is important!\n"; | |
631 print " With -a add -p prefix -f 'UniProtAcc: UniProtID:', this header:\n"; | |
632 print " >P32234|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
633 print " will be converted into:\n"; | |
634 print " >UniProtAcc:P32234|UniProtID:128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
635 print " Stripping prefixes\n"; | |
636 print " In this case the order of the *fixes specified with -f is not relevant.\n"; | |
637 print " With both -a strip -p prefix -f 'UniProtAcc: UniProtID:' or \n"; | |
638 print " with -a strip -p prefix -f 'UniProtID: UniProtAcc:', this header:\n"; | |
639 print " >UniProtAcc:P32234|UniProtID:128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
640 print " will be converted into:\n"; | |
641 print " >P32234|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
642 print " Replacing prefixes with a suffix\n"; | |
643 print " In this case the order of the *fixes specified with -f is not relevant.\n"; | |
644 print " With -a replace -p pre2suf -f 'REV_' -n '_REV', this header:\n"; | |
645 print " >REV_P32234|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
646 print " will be converted into:\n"; | |
647 print " >P32234_REV|128UP_DROME GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
648 print " Deleting sequence identifiers\n"; | |
649 print " Supply a comma separated list of numbers for the ranks of the identifiers / accession numbers you want to remove.\n"; | |
650 print " Multiple identifiers must be separated with a pipe symbol.\n"; | |
651 print " With -a delete -p '1,3', this header:\n"; | |
652 print " >UniProtID:128UP_DROME|UniProtAcc:P32234|EMBL:AY069810 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
653 print " will be converted into:\n"; | |
654 print " >UniProtAcc:P32234 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
655 print " Changing the order of sequence identifiers\n"; | |
656 print " Supply a comma separated list of numbers for the new order of all the identifiers / accession numbers in a header.\n"; | |
657 print " Multiple identifiers must be separated with a pipe symbol.\n"; | |
658 print " Hence if your headers contain 4 pipe separated IDs and you only want to swap the order of the first and the second, \n"; | |
659 print " you will still need to specify the new (unchanged) order for number 3 and 4 too.\n"; | |
660 print " With -a shuffle -p '2,1,3', this header:\n"; | |
661 print " >UniProtID:128UP_DROME|UniProtAcc:P32234|EMBL:AY069810 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
662 print " will be converted into:\n"; | |
663 print " >UniProtAcc:P32234|UniProtID:128UP_DROME|EMBL:AY069810 GTP-binding protein 128up - Drosophila melanogaster (Fruit fly)\n"; | |
664 print " Specifying only *2,1* as the New order for the IDs will not work, because this header contains 3 IDs, \n"; | |
665 print " so you'll have to include the (new) position for the third one as well.\n"; | |
666 print "\n"; | |
667 exit; | |
668 | |
669 } |