Mercurial > repos > public-health-bioinformatics > amino2consensus
diff amino2consensus.py @ 0:1e0389317446 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
author | public-health-bioinformatics |
---|---|
date | Mon, 06 Jan 2020 19:11:48 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/amino2consensus.py Mon Jan 06 19:11:48 2020 -0500 @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import argparse +import csv + + +AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*'] + + +def determine_amino(amino_counts, threshold): + amino = "" + total_count = sum(amino_counts.values()) + amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] + if total_count == 0: + amino = "#" + elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: + amino = amino_with_max_counts + else: + amino = "@" + return amino + + +def determine_first_region(amino_file): + with open(amino_file) as f: + reader = csv.DictReader(f) + row = next(reader) + region = row['region'] + return region + + +def main(args): + current_region = determine_first_region(args.amino) + seq = [] + with open(args.amino) as f: + reader = csv.DictReader(f) + for row in reader: + if row['region'] == current_region: + amino_counts = {} + for amino_acid in AMINO_ACIDS: + amino_counts[amino_acid] = int(row[amino_acid]) + amino = determine_amino(amino_counts, args.threshold) + seq.append(amino) + else: + print(">" + current_region) + print(''.join(seq)) + current_region = row['region'] + seq = [] + amino_counts = {} + for amino_acid in AMINO_ACIDS: + amino_counts[amino_acid] = int(row[amino_acid]) + amino = determine_amino(amino_counts, args.threshold) + seq.append(amino) + print(">" + current_region) + print(''.join(seq)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("amino", help="MiCall amino.csv output file") + parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling") + args = parser.parse_args() + main(args)