Mercurial > repos > petr-novak > re_utils
comparison annot2krona.py @ 17:d14b68e9fd1d draft
Uploaded - new tools added
author | petr-novak |
---|---|
date | Wed, 28 Apr 2021 08:37:20 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
16:5376e1c9adec | 17:d14b68e9fd1d |
---|---|
1 #!/usr/bin/env python3 | |
2 ''' | |
3 take various inputs and convert it to krona tabular format for visualization | |
4 supported inputs: | |
5 - DANTE gff3 | |
6 - TODO PROFREP gff3 | |
7 - TODO RE archive - normal run | |
8 - TODO RE archive - comparative | |
9 - | |
10 ''' | |
11 import argparse | |
12 import re | |
13 import collections | |
14 | |
15 | |
16 def parse_dante_gff(f): | |
17 '''load gff3 file and return classification with counts''' | |
18 r = re.compile("Final_Classification=") | |
19 cls_count = collections.defaultdict(int) | |
20 for line in f: | |
21 if re.match("#", line.strip()): | |
22 continue | |
23 attributes = line.split("\t")[8].split(";") | |
24 cls_raw = list(filter(r.match, attributes))[0] | |
25 cls = re.sub(r, "",cls_raw) | |
26 cls_count[cls] += 1 | |
27 | |
28 return cls_count | |
29 | |
30 | |
31 def export_classification(cls, f): | |
32 '''save classification to tab delimited file''' | |
33 for i in cls: | |
34 f.write('{}\t{}\n'.format(cls[i], i.replace("|","\t"))) | |
35 | |
36 | |
37 | |
38 if __name__ == "__main__": | |
39 parser = argparse.ArgumentParser() | |
40 parser.add_argument('-f', '--format', choices=['dante', 'profrep', 're']) | |
41 parser.add_argument('-i', '--input', type=argparse.FileType('r')) | |
42 parser.add_argument('-o', '--output', type=argparse.FileType('w')) | |
43 | |
44 args = parser.parse_args() | |
45 | |
46 if args.format == "dante": | |
47 classification = parse_dante_gff(args.input) | |
48 | |
49 if args.format in ["profrep" 're']: | |
50 print("Not implemented") | |
51 exit(0) | |
52 | |
53 export_classification(classification, args.output) |