annotate prepare_amplicon_info.py @ 5:49236b03e4fd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
author iuc
date Wed, 19 May 2021 16:49:13 +0000
parents
children 584beffa972b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
2
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
3 # extends ivar trim's amplicon info parsing abilities
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
4 # to include calculation of amplicon regions from
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
5 # sets of nested (more than two) primers
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
6
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
7 import sys
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
8
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
9
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
10 # parse primers and their start positions from BED file
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
11 primer_starts = {}
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
12 with open(sys.argv[1]) as i:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
13 for line in i:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
14 f = line.strip().split('\t')
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
15 try:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
16 if f[5] == '+':
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
17 primer_starts[f[3]] = int(f[1])
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
18 elif f[5] == '-':
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
19 primer_starts[f[3]] = int(f[2]) - 1
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
20 else:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
21 raise ValueError()
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
22 except (IndexError, ValueError):
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
23 sys.exit(
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
24 'Primer BED file needs to be TAB-separated with the '
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
25 'following columns: '
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
26 'chrom, chromStart, chromEnd, name, score, strand, '
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
27 'where "chromStart", "chromEnd" need to be integer values '
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
28 'and "strand" needs to be either "+" or "-".'
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
29 )
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
30
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
31 # parse amplicon info and record outer primer names
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
32 with open(sys.argv[2]) as i:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
33 ret_lines = []
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
34 for line in i:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
35 first = last = None
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
36 for pname in line.strip().split('\t'):
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
37 try:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
38 primer_start = primer_starts[pname]
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
39 except KeyError:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
40 sys.exit(
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
41 'Amplicon info with primer name not found in '
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
42 f'primer BED file: "{pname}"'
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
43 )
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
44 if first is None or primer_start < primer_starts[first]:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
45 first = pname
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
46 if last is None or primer_start > primer_starts[last]:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
47 last = pname
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
48 if first == last:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
49 sys.exit(
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
50 line
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
51 + 'is not a proper amplicon info line.'
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
52 )
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
53 ret_lines.append(f'{first}\t{last}\n')
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
54
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
55 # write amended amplicon info
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
56 with open(sys.argv[3], 'w') as o:
49236b03e4fd "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
iuc
parents:
diff changeset
57 o.writelines(ret_lines)