Mercurial > repos > padge > trimal
comparison trimal_repo/scripts/remove_shorter_sequences.py @ 0:b15a3147e604 draft
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author | padge |
---|---|
date | Fri, 25 Mar 2022 17:10:43 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b15a3147e604 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 # | |
4 # 'remove_shorter_sequences.py' | |
5 # | |
6 # Script implemented to explore future functionalities of trimAl. The script | |
7 # analyzes the length of each sequence and remove those shorter than a given | |
8 # length set by the user | |
9 # | |
10 # [2015] S. Capella-Gutierrez - scapella@crg.es | |
11 # | |
12 # this script is free software: you can redistribute it and/or modify it under | |
13 # the terms of the GNU General Public License as published by the Free | |
14 # Software Foundation, the last available version. | |
15 # | |
16 # this script is distributed in the hope that it will be useful, but WITHOUT | |
17 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
18 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
19 # more details on <http://www.gnu.org/licenses/> | |
20 # | |
21 from Bio import AlignIO | |
22 import argparse | |
23 import sys | |
24 import os | |
25 | |
26 if __name__ == "__main__": | |
27 | |
28 parser = argparse.ArgumentParser() | |
29 | |
30 parser.add_argument("-i", "--in", dest = "inFile", required = True, type = \ | |
31 str, help = "Input alignment") | |
32 | |
33 parser.add_argument("-o", "--out", dest = "outFile", default = None, type = \ | |
34 str, help = "Set output file. It will be generated into FASTA format") | |
35 | |
36 parser.add_argument("-m", "--min", dest = "minLen", default = 1, type = int, | |
37 help = "Set a minimum sequence length to keep it in the output alignment") | |
38 | |
39 parser.add_argument("-f", "--format", dest = "inFormat", default = "fasta", \ | |
40 type = str, choices = ["clustal", "fasta-m10", "fasta", "phylip-relaxed", \ | |
41 "phylip-sequential", "phylip", "nexus"],help = "Set input alignment format") | |
42 | |
43 parser.add_argument("-g", "--gap_symbol", dest = "gapSymbol", default = '-', \ | |
44 type = str, help = "Define the gap symbol used in the input alignment") | |
45 | |
46 parser.add_argument("--keep_header", dest = "keepHeader", default = False, | |
47 action = "store_true", help = "Keep original alignment sequence IDs indepen" | |
48 + "dently of blank spaces on it") | |
49 | |
50 parser.add_argument("-v", "--verbose", dest = "verbose", default = False, | |
51 action = "store_true", help = "Activate verbosity") | |
52 | |
53 args = parser.parse_args() | |
54 | |
55 if not os.path.isfile(args.inFile): | |
56 sys.exit(("ERROR: Check input alignment file '%s'") % (args.inFile)) | |
57 | |
58 ofile = open(args.outFile, "w") if args.outFile else sys.stdout | |
59 for record in AlignIO.read(args.inFile, format = args.inFormat): | |
60 sequence_id = record.id if not args.keepHeader else record.description | |
61 sequence = str(record.seq) | |
62 | |
63 length = len(sequence) | |
64 valid = len([ps for ps in range(length) if sequence[ps] != args.gapSymbol]) | |
65 | |
66 if valid >= args.minLen: | |
67 print >> ofile, (">%s\n%s") % (sequence_id, sequence) | |
68 elif args.verbose: | |
69 msg = ("INFO: Sequence '%s' has been removed. Shorter ") % (sequence_id) | |
70 msg += ("(%d) than min. sequence length (%d)") % (valid, args.minLen) | |
71 print >> sys.stderr, msg | |
72 sys.stderr.flush() | |
73 ofile.close() |