Mercurial > repos > iss > eurl_vtec_wgs_pt
diff scripts/ReMatCh/utils/combine_alignment_consensus.py @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/ReMatCh/utils/combine_alignment_consensus.py Mon Mar 21 15:23:09 2022 +0000 @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +# -*- coding: utf-8 -*- + +""" +combine_alignment_consensus.py - Combine the alignment consensus +sequences from ReMatCh first run by reference sequences into single +files +<https://github.com/B-UMMI/ReMatCh/> + +Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt> + +Last modified: October 15, 2018 + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import os +import argparse +import time +import sys + +version = '0.2' + + +def concatenate_files(input_files_list, outdir): + all_executed_printed = False + for x, input_file in enumerate(input_files_list): + sample = os.path.basename(input_file).rsplit('.', 2)[0] + with open(input_file, 'rtU') as reader: + writer = None + for line in reader: + line = line.rstrip('\r\n') + if line.startswith('>'): + file_output = os.path.join(outdir, line[1:] + '.fasta') + if writer is not None: + writer.flush() + writer.close() + if os.path.isfile(file_output): + writer = open(file_output, 'at') + else: + writer = open(file_output, 'wt') + writer.write('>' + sample + '\n') + else: + if len(line) > 0: + writer.write(line + '\n') + writer.flush() + writer.close() + + if (x + 1) % 100 == 0: + print('\n' + str(round((float(x + 1) / len(input_files_list)) * 100, 2)) + '% of IDs already processed') + all_executed_printed = True + if not all_executed_printed: + print('\n' + str(round((float(x + 1) / len(input_files_list)) * 100, 2)) + '% of IDs already processed') + + +def combine_alignment_consensus(args): + outdir = os.path.abspath(args.outdir) + if not os.path.isdir(outdir): + os.makedirs(outdir) + + outdir = os.path.join(outdir, 'combine_alignment_consensus_' + time.strftime("%Y%m%d-%H%M%S"), '') + os.makedirs(outdir) + + workdir = os.path.abspath(args.workdir) + + alignment_files = [] + directories = [d for d in os.listdir(workdir) if + not d.startswith('.') and + os.path.isdir(os.path.join(workdir, d, ''))] + for sample_dir in directories: + sample_dir_path = os.path.join(workdir, sample_dir, '') + files = [f for f in os.listdir(sample_dir_path) if + not f.startswith('.') and + os.path.isfile(os.path.join(sample_dir_path, f))] + for file_found in files: + if file_found.endswith('.alignment.fasta'): + file_found_path = os.path.join(sample_dir_path, file_found) + alignment_files.append(file_found_path) + + if len(alignment_files) > 0: + concatenate_files(alignment_files, outdir) + else: + sys.exit('No ReMatCh alignment.fasta files were found!') + + +def main(): + parser = argparse.ArgumentParser(prog='combine_alignment_consensus.py', + description='Combine the alignment consensus sequences from ReMatCh first run by' + ' reference sequences into single' + ' files', formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version)) + + parser_required = parser.add_argument_group('Required options') + parser_required.add_argument('-w', '--workdir', type=str, metavar='/path/to/rematch/working/directory/', + help='Path to the directory where ReMatCh was running', required=True) + + parser_optional_general = parser.add_argument_group('General facultative options') + parser_optional_general.add_argument('-o', '--outdir', type=str, metavar='/path/to/output/directory/', + help='Path to the directory where the combined sequence files will stored', + required=False, default='.') + + args = parser.parse_args() + + combine_alignment_consensus(args) + + +if __name__ == "__main__": + main()