annotate annot2krona.py @ 17:d14b68e9fd1d draft

Uploaded - new tools added
author petr-novak
date Wed, 28 Apr 2021 08:37:20 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python3
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
2 '''
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
3 take various inputs and convert it to krona tabular format for visualization
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
4 supported inputs:
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
5 - DANTE gff3
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
6 - TODO PROFREP gff3
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
7 - TODO RE archive - normal run
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
8 - TODO RE archive - comparative
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
9 -
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
10 '''
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
11 import argparse
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
12 import re
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
13 import collections
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
14
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
15
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
16 def parse_dante_gff(f):
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
17 '''load gff3 file and return classification with counts'''
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
18 r = re.compile("Final_Classification=")
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
19 cls_count = collections.defaultdict(int)
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
20 for line in f:
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
21 if re.match("#", line.strip()):
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
22 continue
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
23 attributes = line.split("\t")[8].split(";")
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
24 cls_raw = list(filter(r.match, attributes))[0]
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
25 cls = re.sub(r, "",cls_raw)
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
26 cls_count[cls] += 1
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
27
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
28 return cls_count
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
29
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
30
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
31 def export_classification(cls, f):
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
32 '''save classification to tab delimited file'''
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
33 for i in cls:
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
34 f.write('{}\t{}\n'.format(cls[i], i.replace("|","\t")))
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
35
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
36
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
37
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
38 if __name__ == "__main__":
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
39 parser = argparse.ArgumentParser()
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
40 parser.add_argument('-f', '--format', choices=['dante', 'profrep', 're'])
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
41 parser.add_argument('-i', '--input', type=argparse.FileType('r'))
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
42 parser.add_argument('-o', '--output', type=argparse.FileType('w'))
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
43
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
44 args = parser.parse_args()
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
45
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
46 if args.format == "dante":
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
47 classification = parse_dante_gff(args.input)
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
48
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
49 if args.format in ["profrep" 're']:
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
50 print("Not implemented")
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
51 exit(0)
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
52
d14b68e9fd1d Uploaded - new tools added
petr-novak
parents:
diff changeset
53 export_classification(classification, args.output)