annotate write_amplicon_info_file.py @ 8:2d9926ce62be draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
author iuc
date Thu, 05 Aug 2021 12:45:47 +0000
parents
children 731182d54f78
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
2
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
3 import argparse
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
4 import re
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
5
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
6 AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)'
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
7
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
8
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
9 def primer_info_to_position(name):
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
10 position = 0
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
11 re_match = re.match(AMPLICON_NAME_RE, name)
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
12 if re_match is None:
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
13 raise ValueError("{} does not match expected amplicon name format".format(name))
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
14 side = re_match.group('name')
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
15 num = re_match.group('num')
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
16 if side == 'RIGHT' or side == 'R':
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
17 position += 1000
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
18 if num is not None:
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
19 position += int(num)
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
20 return position
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
21
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
22
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
23 def write_amplicon_info_file(bed_file, amplicon_info_file):
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
24 amplicon_sets = {}
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
25 amplicon_ids = set()
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
26 for line in bed_file:
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
27 fields = line.strip().split('\t')
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
28 name = fields[3]
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
29 re_match = re.match(AMPLICON_NAME_RE, name)
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
30 if re_match is None:
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
31 raise ValueError("{} does not match expected amplicon name format".format(name))
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
32 amplicon_id = int(re_match.group('num'))
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
33 amplicon_set = amplicon_sets.get(amplicon_id, [])
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
34 amplicon_set.append(name)
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
35 amplicon_sets[amplicon_id] = amplicon_set
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
36 amplicon_ids.add(amplicon_id)
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
37
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
38 for id in sorted(list(amplicon_ids)):
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
39 amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n'
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
40 amplicon_info_file.write(amplicon_info)
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
41 amplicon_info_file.close()
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
42
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
43
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
44 if __name__ == '__main__':
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
45 parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions')
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
46 parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file')
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
47 parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format')
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
48 args = parser.parse_args()
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
49
2d9926ce62be "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
50 write_amplicon_info_file(args.bed_file, args.amplicon_info_file)