comparison trimal_repo/scripts/remove_shorter_sequences.py @ 0:b15a3147e604 draft

"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 17:10:43 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b15a3147e604
1 #!/usr/bin/python
2
3 #
4 # 'remove_shorter_sequences.py'
5 #
6 # Script implemented to explore future functionalities of trimAl. The script
7 # analyzes the length of each sequence and remove those shorter than a given
8 # length set by the user
9 #
10 # [2015] S. Capella-Gutierrez - scapella@crg.es
11 #
12 # this script is free software: you can redistribute it and/or modify it under
13 # the terms of the GNU General Public License as published by the Free
14 # Software Foundation, the last available version.
15 #
16 # this script is distributed in the hope that it will be useful, but WITHOUT
17 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 # more details on <http://www.gnu.org/licenses/>
20 #
21 from Bio import AlignIO
22 import argparse
23 import sys
24 import os
25
26 if __name__ == "__main__":
27
28 parser = argparse.ArgumentParser()
29
30 parser.add_argument("-i", "--in", dest = "inFile", required = True, type = \
31 str, help = "Input alignment")
32
33 parser.add_argument("-o", "--out", dest = "outFile", default = None, type = \
34 str, help = "Set output file. It will be generated into FASTA format")
35
36 parser.add_argument("-m", "--min", dest = "minLen", default = 1, type = int,
37 help = "Set a minimum sequence length to keep it in the output alignment")
38
39 parser.add_argument("-f", "--format", dest = "inFormat", default = "fasta", \
40 type = str, choices = ["clustal", "fasta-m10", "fasta", "phylip-relaxed", \
41 "phylip-sequential", "phylip", "nexus"],help = "Set input alignment format")
42
43 parser.add_argument("-g", "--gap_symbol", dest = "gapSymbol", default = '-', \
44 type = str, help = "Define the gap symbol used in the input alignment")
45
46 parser.add_argument("--keep_header", dest = "keepHeader", default = False,
47 action = "store_true", help = "Keep original alignment sequence IDs indepen"
48 + "dently of blank spaces on it")
49
50 parser.add_argument("-v", "--verbose", dest = "verbose", default = False,
51 action = "store_true", help = "Activate verbosity")
52
53 args = parser.parse_args()
54
55 if not os.path.isfile(args.inFile):
56 sys.exit(("ERROR: Check input alignment file '%s'") % (args.inFile))
57
58 ofile = open(args.outFile, "w") if args.outFile else sys.stdout
59 for record in AlignIO.read(args.inFile, format = args.inFormat):
60 sequence_id = record.id if not args.keepHeader else record.description
61 sequence = str(record.seq)
62
63 length = len(sequence)
64 valid = len([ps for ps in range(length) if sequence[ps] != args.gapSymbol])
65
66 if valid >= args.minLen:
67 print >> ofile, (">%s\n%s") % (sequence_id, sequence)
68 elif args.verbose:
69 msg = ("INFO: Sequence '%s' has been removed. Shorter ") % (sequence_id)
70 msg += ("(%d) than min. sequence length (%d)") % (valid, args.minLen)
71 print >> sys.stderr, msg
72 sys.stderr.flush()
73 ofile.close()