Mercurial > repos > padge > trimal
comparison trimal_repo/scripts/get_sequences_gaps_ratio.py @ 0:b15a3147e604 draft
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author | padge |
---|---|
date | Fri, 25 Mar 2022 17:10:43 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b15a3147e604 |
---|---|
1 #!/usr/bin/python3 | |
2 | |
3 # | |
4 # 'get_sequneces_gaps.py' | |
5 # | |
6 # Script implemented to obtain the sequences index for those seuqneces | |
7 # exceding a minimum gaps' ratio threshold. | |
8 # | |
9 # [2020] S. Capella-Gutierrez - salvador.capella@bsc.es | |
10 # | |
11 # this script is free software: you can redistribute it and/or modify it under | |
12 # the terms of the GNU General Public License as published by the Free | |
13 # Software Foundation, the last available version. | |
14 # | |
15 # this script is distributed in the hope that it will be useful, but WITHOUT | |
16 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
18 # more details on <http://www.gnu.org/licenses/> | |
19 # | |
20 from Bio import AlignIO | |
21 import argparse | |
22 import sys | |
23 import os | |
24 | |
25 if __name__ == "__main__": | |
26 | |
27 parser = argparse.ArgumentParser() | |
28 | |
29 parser.add_argument("-i", "--in", dest = "inFile", required = True, type = \ | |
30 str, help = "Input alignment") | |
31 | |
32 parser.add_argument("-o", "--out", dest = "outFile", default = None, type = \ | |
33 str, help = "Set output file. It will be generated into FASTA format") | |
34 | |
35 parser.add_argument("-f", "--format", dest = "inFormat", default = "fasta", \ | |
36 type = str, choices = ["clustal", "fasta-m10", "fasta", "phylip-relaxed", \ | |
37 "phylip-sequential", "phylip", "nexus"],help = "Set input alignment format") | |
38 | |
39 parser.add_argument("-g", "--gap_symbol", dest = "gapSymbol", default = '-', \ | |
40 type = str, help = "Define the gap symbol used in the input alignment") | |
41 | |
42 parser.add_argument("--show_only_index", dest = "showIndexes", default = False, \ | |
43 action = "store_true", help = "Show only the indexes of sequences with a " | |
44 + "gaps' ratio equal or higher than the established threshold") | |
45 | |
46 parser.add_argument("--threshold", dest = "gapsThreshold", default = 0.0, \ | |
47 type = float, help = "Identify sequences with a minimum of gaps' ratio") | |
48 | |
49 parser.add_argument("--keep_header", dest = "keepHeader", default = False, | |
50 action = "store_true", help = "Keep original alignment sequence IDs indepen" | |
51 + "dently of blank spaces on it") | |
52 | |
53 args = parser.parse_args() | |
54 | |
55 if not os.path.isfile(args.inFile): | |
56 sys.exit(("ERROR: Check input alignment file '%s'") % (args.inFile)) | |
57 | |
58 index = 0 | |
59 indexes = [] | |
60 ofile = open(args.outFile, "w") if args.outFile else sys.stdout | |
61 for record in AlignIO.read(args.inFile, format = args.inFormat): | |
62 sequence_id = record.id if not args.keepHeader else record.description | |
63 sequence = str(record.seq) | |
64 | |
65 length = len(sequence) | |
66 valid = len([ps for ps in range(length) if sequence[ps] != args.gapSymbol]) | |
67 gaps_ratio = 1 - (valid/length) | |
68 | |
69 if gaps_ratio >= args.gapsThreshold: | |
70 if not args.showIndexes: | |
71 print(f'{index:4d}\t{sequence_id:30}\t{gaps_ratio:.4f}', file = ofile) | |
72 indexes.append(index) | |
73 index += 1 | |
74 | |
75 if args.showIndexes: | |
76 print (','.join(map(str, indexes)), file = ofile) | |
77 | |
78 ofile.close() |