annotate amino2consensus.py @ 3:fbc19b44ab66 draft default tip

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit e58e1a02d64c2771fadf006bdf8f3661d069b6f3"
author public-health-bioinformatics
date Fri, 28 Feb 2020 13:03:20 -0500
parents 1e0389317446
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
1 #!/usr/bin/env python
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
2
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
3 from __future__ import print_function
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
4
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
5 import argparse
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
6 import csv
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
7
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
8
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
9 AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*']
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
10
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
11
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
12 def determine_amino(amino_counts, threshold):
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
13 amino = ""
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
14 total_count = sum(amino_counts.values())
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
15 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0]
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
16 if total_count == 0:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
17 amino = "#"
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
18 elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
19 amino = amino_with_max_counts
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
20 else:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
21 amino = "@"
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
22 return amino
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
23
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
24
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
25 def determine_first_region(amino_file):
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
26 with open(amino_file) as f:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
27 reader = csv.DictReader(f)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
28 row = next(reader)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
29 region = row['region']
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
30 return region
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
31
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
32
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
33 def main(args):
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
34 current_region = determine_first_region(args.amino)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
35 seq = []
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
36 with open(args.amino) as f:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
37 reader = csv.DictReader(f)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
38 for row in reader:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
39 if row['region'] == current_region:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
40 amino_counts = {}
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
41 for amino_acid in AMINO_ACIDS:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
42 amino_counts[amino_acid] = int(row[amino_acid])
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
43 amino = determine_amino(amino_counts, args.threshold)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
44 seq.append(amino)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
45 else:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
46 print(">" + current_region)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
47 print(''.join(seq))
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
48 current_region = row['region']
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
49 seq = []
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
50 amino_counts = {}
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
51 for amino_acid in AMINO_ACIDS:
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
52 amino_counts[amino_acid] = int(row[amino_acid])
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
53 amino = determine_amino(amino_counts, args.threshold)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
54 seq.append(amino)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
55 print(">" + current_region)
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
56 print(''.join(seq))
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
57
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
58
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
59 if __name__ == '__main__':
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
60 parser = argparse.ArgumentParser()
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
61 parser.add_argument("amino", help="MiCall amino.csv output file")
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
62 parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling")
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
63 args = parser.parse_args()
1e0389317446 "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9c3ab5825c19a7c400a46f727975edb480a91c09"
public-health-bioinformatics
parents:
diff changeset
64 main(args)