Mercurial > repos > public-health-bioinformatics > micall_lite
view amino2consensus.py @ 1:bb549c5eaf34 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 5d661c102f12509d19c8869988c3f2f7ce00732b"
author | public-health-bioinformatics |
---|---|
date | Wed, 08 Jan 2020 17:43:18 -0500 |
parents | 023064145bea |
children |
line wrap: on
line source
#!/usr/bin/env python from __future__ import print_function import argparse import csv AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*'] def determine_amino(amino_counts, threshold): amino = "" total_count = sum(amino_counts.values()) amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] if total_count == 0: amino = "#" elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: amino = amino_with_max_counts else: amino = "@" return amino def determine_first_region(amino_file): with open(amino_file) as f: reader = csv.DictReader(f) row = next(reader) region = row['region'] return region def main(args): current_region = determine_first_region(args.amino) seq = [] with open(args.amino) as f: reader = csv.DictReader(f) for row in reader: if row['region'] == current_region: amino_counts = {} for amino_acid in AMINO_ACIDS: amino_counts[amino_acid] = int(row[amino_acid]) amino = determine_amino(amino_counts, args.threshold) seq.append(amino) else: print(">" + current_region) print(''.join(seq)) current_region = row['region'] seq = [] amino_counts = {} for amino_acid in AMINO_ACIDS: amino_counts[amino_acid] = int(row[amino_acid]) amino = determine_amino(amino_counts, args.threshold) seq.append(amino) print(">" + current_region) print(''.join(seq)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("amino", help="MiCall amino.csv output file") parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling") args = parser.parse_args() main(args)