Mercurial > repos > public-health-bioinformatics > micall_lite
annotate amino2consensus.py @ 1:bb549c5eaf34 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 5d661c102f12509d19c8869988c3f2f7ce00732b"
author | public-health-bioinformatics |
---|---|
date | Wed, 08 Jan 2020 17:43:18 -0500 |
parents | 023064145bea |
children |
rev | line source |
---|---|
0
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
1 #!/usr/bin/env python |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
2 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
3 from __future__ import print_function |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
4 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
5 import argparse |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
6 import csv |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
7 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
8 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
9 AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*'] |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
10 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
11 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
12 def determine_amino(amino_counts, threshold): |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
13 amino = "" |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
14 total_count = sum(amino_counts.values()) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
15 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
16 if total_count == 0: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
17 amino = "#" |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
18 elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
19 amino = amino_with_max_counts |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
20 else: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
21 amino = "@" |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
22 return amino |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
23 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
24 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
25 def determine_first_region(amino_file): |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
26 with open(amino_file) as f: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
27 reader = csv.DictReader(f) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
28 row = next(reader) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
29 region = row['region'] |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
30 return region |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
31 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
32 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
33 def main(args): |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
34 current_region = determine_first_region(args.amino) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
35 seq = [] |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
36 with open(args.amino) as f: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
37 reader = csv.DictReader(f) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
38 for row in reader: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
39 if row['region'] == current_region: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
40 amino_counts = {} |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
41 for amino_acid in AMINO_ACIDS: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
42 amino_counts[amino_acid] = int(row[amino_acid]) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
43 amino = determine_amino(amino_counts, args.threshold) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
44 seq.append(amino) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
45 else: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
46 print(">" + current_region) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
47 print(''.join(seq)) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
48 current_region = row['region'] |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
49 seq = [] |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
50 amino_counts = {} |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
51 for amino_acid in AMINO_ACIDS: |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
52 amino_counts[amino_acid] = int(row[amino_acid]) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
53 amino = determine_amino(amino_counts, args.threshold) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
54 seq.append(amino) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
55 print(">" + current_region) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
56 print(''.join(seq)) |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
57 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
58 |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
59 if __name__ == '__main__': |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
60 parser = argparse.ArgumentParser() |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
61 parser.add_argument("amino", help="MiCall amino.csv output file") |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
62 parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling") |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
63 args = parser.parse_args() |
023064145bea
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff
changeset
|
64 main(args) |