comparison annot2krona.py @ 17:d14b68e9fd1d draft

Uploaded - new tools added
author petr-novak
date Wed, 28 Apr 2021 08:37:20 +0000
parents
children
comparison
equal deleted inserted replaced
16:5376e1c9adec 17:d14b68e9fd1d
1 #!/usr/bin/env python3
2 '''
3 take various inputs and convert it to krona tabular format for visualization
4 supported inputs:
5 - DANTE gff3
6 - TODO PROFREP gff3
7 - TODO RE archive - normal run
8 - TODO RE archive - comparative
9 -
10 '''
11 import argparse
12 import re
13 import collections
14
15
16 def parse_dante_gff(f):
17 '''load gff3 file and return classification with counts'''
18 r = re.compile("Final_Classification=")
19 cls_count = collections.defaultdict(int)
20 for line in f:
21 if re.match("#", line.strip()):
22 continue
23 attributes = line.split("\t")[8].split(";")
24 cls_raw = list(filter(r.match, attributes))[0]
25 cls = re.sub(r, "",cls_raw)
26 cls_count[cls] += 1
27
28 return cls_count
29
30
31 def export_classification(cls, f):
32 '''save classification to tab delimited file'''
33 for i in cls:
34 f.write('{}\t{}\n'.format(cls[i], i.replace("|","\t")))
35
36
37
38 if __name__ == "__main__":
39 parser = argparse.ArgumentParser()
40 parser.add_argument('-f', '--format', choices=['dante', 'profrep', 're'])
41 parser.add_argument('-i', '--input', type=argparse.FileType('r'))
42 parser.add_argument('-o', '--output', type=argparse.FileType('w'))
43
44 args = parser.parse_args()
45
46 if args.format == "dante":
47 classification = parse_dante_gff(args.input)
48
49 if args.format in ["profrep" 're']:
50 print("Not implemented")
51 exit(0)
52
53 export_classification(classification, args.output)