17
|
1 #!/usr/bin/env python3
|
|
2 '''
|
|
3 take various inputs and convert it to krona tabular format for visualization
|
|
4 supported inputs:
|
|
5 - DANTE gff3
|
|
6 - TODO PROFREP gff3
|
|
7 - TODO RE archive - normal run
|
|
8 - TODO RE archive - comparative
|
|
9 -
|
|
10 '''
|
|
11 import argparse
|
|
12 import re
|
|
13 import collections
|
|
14
|
|
15
|
|
16 def parse_dante_gff(f):
|
|
17 '''load gff3 file and return classification with counts'''
|
|
18 r = re.compile("Final_Classification=")
|
|
19 cls_count = collections.defaultdict(int)
|
|
20 for line in f:
|
|
21 if re.match("#", line.strip()):
|
|
22 continue
|
|
23 attributes = line.split("\t")[8].split(";")
|
|
24 cls_raw = list(filter(r.match, attributes))[0]
|
|
25 cls = re.sub(r, "",cls_raw)
|
|
26 cls_count[cls] += 1
|
|
27
|
|
28 return cls_count
|
|
29
|
|
30
|
|
31 def export_classification(cls, f):
|
|
32 '''save classification to tab delimited file'''
|
|
33 for i in cls:
|
|
34 f.write('{}\t{}\n'.format(cls[i], i.replace("|","\t")))
|
|
35
|
|
36
|
|
37
|
|
38 if __name__ == "__main__":
|
|
39 parser = argparse.ArgumentParser()
|
|
40 parser.add_argument('-f', '--format', choices=['dante', 'profrep', 're'])
|
|
41 parser.add_argument('-i', '--input', type=argparse.FileType('r'))
|
|
42 parser.add_argument('-o', '--output', type=argparse.FileType('w'))
|
|
43
|
|
44 args = parser.parse_args()
|
|
45
|
|
46 if args.format == "dante":
|
|
47 classification = parse_dante_gff(args.input)
|
|
48
|
|
49 if args.format in ["profrep" 're']:
|
|
50 print("Not implemented")
|
|
51 exit(0)
|
|
52
|
|
53 export_classification(classification, args.output)
|