Mercurial > repos > iuc > ivar_trim
annotate write_amplicon_info_file.py @ 8:397e5f0eb3ef draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
| author | iuc | 
|---|---|
| date | Thu, 05 Aug 2021 12:46:37 +0000 | 
| parents | |
| children | c092052ed673 | 
| rev | line source | 
|---|---|
| 
8
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
2 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
3 import argparse | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
4 import re | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
5 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
6 AMPLICON_NAME_RE = r'.*_(?P<num>\d+)_[^0-9]*(?P<name>L(?:EFT)?|R(?:IGHT)?)' | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
7 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
8 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
9 def primer_info_to_position(name): | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
10 position = 0 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
11 re_match = re.match(AMPLICON_NAME_RE, name) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
12 if re_match is None: | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
13 raise ValueError("{} does not match expected amplicon name format".format(name)) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
14 side = re_match.group('name') | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
15 num = re_match.group('num') | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
16 if side == 'RIGHT' or side == 'R': | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
17 position += 1000 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
18 if num is not None: | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
19 position += int(num) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
20 return position | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
21 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
22 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
23 def write_amplicon_info_file(bed_file, amplicon_info_file): | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
24 amplicon_sets = {} | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
25 amplicon_ids = set() | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
26 for line in bed_file: | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
27 fields = line.strip().split('\t') | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
28 name = fields[3] | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
29 re_match = re.match(AMPLICON_NAME_RE, name) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
30 if re_match is None: | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
31 raise ValueError("{} does not match expected amplicon name format".format(name)) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
32 amplicon_id = int(re_match.group('num')) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
33 amplicon_set = amplicon_sets.get(amplicon_id, []) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
34 amplicon_set.append(name) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
35 amplicon_sets[amplicon_id] = amplicon_set | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
36 amplicon_ids.add(amplicon_id) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
37 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
38 for id in sorted(list(amplicon_ids)): | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
39 amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n' | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
40 amplicon_info_file.write(amplicon_info) | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
41 amplicon_info_file.close() | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
42 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
43 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
44 if __name__ == '__main__': | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
45 parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions') | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
46 parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file') | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
47 parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format') | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
48 args = parser.parse_args() | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
49 | 
| 
 
397e5f0eb3ef
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 6dae6f97a45a61b1f10be4227d978584624c3b3d"
 
iuc 
parents:  
diff
changeset
 | 
50 write_amplicon_info_file(args.bed_file, args.amplicon_info_file) | 
