Mercurial > repos > iss > eurl_vtec_wgs_pt
comparison scripts/ReMatCh/utils/convert_Ns_to_gaps.py @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6bab5103a14 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 # -*- coding: utf-8 -*- | |
4 | |
5 """ | |
6 convert_Ns_to_gaps.py - Convert the Ns into gaps | |
7 <https://github.com/B-UMMI/ReMatCh/> | |
8 | |
9 Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt> | |
10 | |
11 Last modified: October 15, 2018 | |
12 | |
13 This program is free software: you can redistribute it and/or modify | |
14 it under the terms of the GNU General Public License as published by | |
15 the Free Software Foundation, either version 3 of the License, or | |
16 (at your option) any later version. | |
17 | |
18 This program is distributed in the hope that it will be useful, | |
19 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 GNU General Public License for more details. | |
22 | |
23 You should have received a copy of the GNU General Public License | |
24 along with this program. If not, see <http://www.gnu.org/licenses/>. | |
25 """ | |
26 | |
27 import os | |
28 import argparse | |
29 | |
30 | |
31 version = '0.2' | |
32 | |
33 | |
34 def conversion(infile, outfile): | |
35 last_printed = 0 | |
36 counter = 1 | |
37 with open(infile, 'rtU') as reader: | |
38 with open(outfile, 'wt') as writer: | |
39 for line in reader: | |
40 line = line.rstrip('\r\n') | |
41 if line.startswith('>'): | |
42 writer.write(line + '\n') | |
43 if counter % 10 == 0: | |
44 print('\n' + str(counter) + ' sequences already processed') | |
45 last_printed = counter | |
46 counter += 1 | |
47 else: | |
48 if len(line) > 0: | |
49 line = line.replace('N', '-') | |
50 writer.write(line + '\n') | |
51 if last_printed < counter: | |
52 print('\n' + str(counter - 1) + ' sequences already processed') | |
53 | |
54 | |
55 def convert_n_2_gaps(args): | |
56 outdir = os.path.dirname(os.path.abspath(args.outfile)) | |
57 if not os.path.isdir(outdir): | |
58 os.makedirs(outdir) | |
59 | |
60 outfile = os.path.abspath(args.outfile) | |
61 | |
62 infile = os.path.abspath(args.infile.name) | |
63 | |
64 conversion(infile, outfile) | |
65 | |
66 | |
67 def main(): | |
68 parser = argparse.ArgumentParser(prog='convert_Ns_to_gaps.py', description='Convert the Ns into gaps', | |
69 formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
70 parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version)) | |
71 | |
72 parser_required = parser.add_argument_group('Required options') | |
73 parser_required.add_argument('-i', '--infile', type=argparse.FileType('r'), metavar='/path/to/input/file.fasta', | |
74 help='Path to the fasta file', required=True) | |
75 parser_required.add_argument('-o', '--outfile', type=str, metavar='/path/to/converted/output/file.fasta', | |
76 help='Converted output fasta file', required=True, | |
77 default='converted_Ns_to_gaps.fasta') | |
78 | |
79 args = parser.parse_args() | |
80 | |
81 convert_n_2_gaps(args) | |
82 | |
83 | |
84 if __name__ == "__main__": | |
85 main() |