Mercurial > repos > public-health-bioinformatics > amino2consensus
annotate amino2consensus.py @ 0:1e0389317446 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
author | public-health-bioinformatics |
---|---|
date | Mon, 06 Jan 2020 19:11:48 -0500 |
parents | |
children |
rev | line source |
---|---|
0
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
2 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
3 from __future__ import print_function |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
4 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
5 import argparse |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
6 import csv |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
7 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
8 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
9 AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*'] |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
10 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
11 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
12 def determine_amino(amino_counts, threshold): |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
13 amino = "" |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
14 total_count = sum(amino_counts.values()) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
15 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
16 if total_count == 0: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
17 amino = "#" |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
18 elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
19 amino = amino_with_max_counts |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
20 else: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
21 amino = "@" |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
22 return amino |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
23 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
24 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
25 def determine_first_region(amino_file): |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
26 with open(amino_file) as f: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
27 reader = csv.DictReader(f) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
28 row = next(reader) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
29 region = row['region'] |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
30 return region |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
31 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
32 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
33 def main(args): |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
34 current_region = determine_first_region(args.amino) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
35 seq = [] |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
36 with open(args.amino) as f: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
37 reader = csv.DictReader(f) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
38 for row in reader: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
39 if row['region'] == current_region: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
40 amino_counts = {} |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
41 for amino_acid in AMINO_ACIDS: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
42 amino_counts[amino_acid] = int(row[amino_acid]) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
43 amino = determine_amino(amino_counts, args.threshold) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
44 seq.append(amino) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
45 else: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
46 print(">" + current_region) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
47 print(''.join(seq)) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
48 current_region = row['region'] |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
49 seq = [] |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
50 amino_counts = {} |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
51 for amino_acid in AMINO_ACIDS: |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
52 amino_counts[amino_acid] = int(row[amino_acid]) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
53 amino = determine_amino(amino_counts, args.threshold) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
54 seq.append(amino) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
55 print(">" + current_region) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
56 print(''.join(seq)) |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
57 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
58 |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
59 if __name__ == '__main__': |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
60 parser = argparse.ArgumentParser() |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
61 parser.add_argument("amino", help="MiCall amino.csv output file") |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
62 parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling") |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
63 args = parser.parse_args() |
1e0389317446
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
64 main(args) |