annotate write_amplicon_info_file.py @ 17:abbc8041e8ec draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 32fbe8a14173afe0b39f1483afaba958dc6cd027
author iuc
date Fri, 21 Jun 2024 15:20:59 +0000
parents 584beffa972b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
2
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
3 import argparse
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
4 import re
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
5
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
6
9
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
7 AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)')
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
9
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
10 def write_amplicon_info_file(bed_file, amplicon_info_file):
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
11 amplicon_sets = {}
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
12 for line in bed_file:
10
584beffa972b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 9
diff changeset
13 line = line.strip()
584beffa972b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 9
diff changeset
14 if not line:
584beffa972b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 9
diff changeset
15 continue
584beffa972b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 8ce6fd9aee543d9e62db33a9c95f79d8dc4e6dea
iuc
parents: 9
diff changeset
16 fields = line.split('\t')
9
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
17 start = int(fields[1])
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
18 name = fields[3]
9
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
19 re_match = AMPLICON_PAT.match(name)
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
20 if re_match is None:
9
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
21 raise ValueError(
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
22 '{} does not match expected amplicon name format'.format(name)
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
23 )
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
24 amplicon_id = int(re_match.group('num'))
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
25 amplicon_set = amplicon_sets.get(amplicon_id, [])
9
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
26 amplicon_set.append((name, start))
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
27 amplicon_sets[amplicon_id] = amplicon_set
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
28
9
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
29 # write amplicons sorted by number with primers sorted by start position
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
30 for id in sorted(amplicon_sets):
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
31 amplicon_info = '\t'.join(
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
32 [name for name, start in sorted(
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
33 amplicon_sets[id], key=lambda x: x[1]
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
34 )]
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
35 ) + '\n'
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
36 amplicon_info_file.write(amplicon_info)
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
37 amplicon_info_file.close()
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
38
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
39
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
40 if __name__ == '__main__':
9
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
41 parser = argparse.ArgumentParser(
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
42 description='Write an amplicon info file for iVar '
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
43 'from a BED file describing primer positions'
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
44 )
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
45 parser.add_argument(
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
46 'bed_file', type=argparse.FileType(), help='Primer BED file'
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
47 )
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
48 parser.add_argument(
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
49 'amplicon_info_file', type=argparse.FileType('w'),
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
50 help='Output file: amplicon info file in TSV format'
3888bbe7a9ca "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7"
iuc
parents: 8
diff changeset
51 )
8
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
52 args = parser.parse_args()
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
53
aea7008fe1f1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
iuc
parents:
diff changeset
54 write_amplicon_info_file(args.bed_file, args.amplicon_info_file)