Mercurial > repos > petr-novak > re_utils
comparison cluster_table2krona_format.py @ 17:d14b68e9fd1d draft
Uploaded - new tools added
author | petr-novak |
---|---|
date | Wed, 28 Apr 2021 08:37:20 +0000 |
parents | |
children | 2f1b5d5c5dd5 |
comparison
equal
deleted
inserted
replaced
16:5376e1c9adec | 17:d14b68e9fd1d |
---|---|
1 #!/usr/bin/env python | |
2 import sys | |
3 import re | |
4 from collections import defaultdict | |
5 import argparse | |
6 | |
7 parser = argparse.ArgumentParser() | |
8 parser.add_argument("-i" ,"--input", type=argparse.FileType('r'), help="path to file CLUSTER_table.csv") | |
9 parser.add_argument("-o" ,"--output", type=argparse.FileType('w'), help="output file name") | |
10 parser.add_argument("-m", "--use_manual", action='store_true', default=False) | |
11 | |
12 args = parser.parse_args() | |
13 | |
14 column = 6 if args.use_manual else 4 | |
15 | |
16 | |
17 header = False | |
18 clust_info = {} | |
19 counts = defaultdict(lambda: 0) | |
20 top_clusters = 0 | |
21 with open(args.input.name, 'r') as f: | |
22 for l in f: | |
23 parts = l.split() | |
24 if re.match('.*Cluster.+Supercluster.+Size.+Size_adjusted.+Automatic_annotation.+TAREAN_annotation.+Final_annotation', l): | |
25 print("header detected") | |
26 header = True | |
27 continue | |
28 if header: | |
29 classification = "Top_clusters\t" + "\t".join(parts[column].split("/")[1:]).replace('"','') | |
30 counts[classification] += int(parts[3]) | |
31 top_clusters += int(parts[3]) | |
32 | |
33 elif len(parts) >= 2: | |
34 clust_info[parts[0].replace('"', '')] = int(parts[1]) | |
35 | |
36 counts['Singlets'] = clust_info['Number_of_singlets'] | |
37 counts['Small_cluster'] = int(clust_info['Number_of_reads_in_clusters']) - top_clusters | |
38 | |
39 with open(args.output.name, 'w') as fout: | |
40 for cls_line, nreads in counts.items(): | |
41 fout.write(str(nreads) +"\t" + cls_line + "\n") | |
42 | |
43 | |
44 |