Mercurial > repos > iss > eurl_vtec_wgs_pt
diff scripts/ReMatCh/utils/convert_Ns_to_gaps.py @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/ReMatCh/utils/convert_Ns_to_gaps.py Mon Mar 21 15:23:09 2022 +0000 @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +# -*- coding: utf-8 -*- + +""" +convert_Ns_to_gaps.py - Convert the Ns into gaps +<https://github.com/B-UMMI/ReMatCh/> + +Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt> + +Last modified: October 15, 2018 + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import os +import argparse + + +version = '0.2' + + +def conversion(infile, outfile): + last_printed = 0 + counter = 1 + with open(infile, 'rtU') as reader: + with open(outfile, 'wt') as writer: + for line in reader: + line = line.rstrip('\r\n') + if line.startswith('>'): + writer.write(line + '\n') + if counter % 10 == 0: + print('\n' + str(counter) + ' sequences already processed') + last_printed = counter + counter += 1 + else: + if len(line) > 0: + line = line.replace('N', '-') + writer.write(line + '\n') + if last_printed < counter: + print('\n' + str(counter - 1) + ' sequences already processed') + + +def convert_n_2_gaps(args): + outdir = os.path.dirname(os.path.abspath(args.outfile)) + if not os.path.isdir(outdir): + os.makedirs(outdir) + + outfile = os.path.abspath(args.outfile) + + infile = os.path.abspath(args.infile.name) + + conversion(infile, outfile) + + +def main(): + parser = argparse.ArgumentParser(prog='convert_Ns_to_gaps.py', description='Convert the Ns into gaps', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version)) + + parser_required = parser.add_argument_group('Required options') + parser_required.add_argument('-i', '--infile', type=argparse.FileType('r'), metavar='/path/to/input/file.fasta', + help='Path to the fasta file', required=True) + parser_required.add_argument('-o', '--outfile', type=str, metavar='/path/to/converted/output/file.fasta', + help='Converted output fasta file', required=True, + default='converted_Ns_to_gaps.fasta') + + args = parser.parse_args() + + convert_n_2_gaps(args) + + +if __name__ == "__main__": + main()