annotate customizemetadata.py @ 11:0270541aaf46 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit d9038dd0880b963d36aea01eb4594bc30dc28b1c
author iuc
date Thu, 20 Apr 2023 11:26:31 +0000
parents 27250f92a01a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
2 # -*- coding: utf-8 -*-
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
3
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
4 import argparse
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
5 import bz2
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
6 import json
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
7 import pickle
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
8 import re
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
9 from pathlib import Path
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
10
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
11
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
12 def load_from_json(json_fp):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
13 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
14 Read JSON file with marker metadata
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
15
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
16 :param json_fp: Path to JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
17 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
18 with open(json_fp, 'r') as json_f:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
19 data = json.load(json_f)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
20
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
21 for m in data['markers']:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
22 data['markers'][m]['ext'] = set(data['markers'][m]['ext'])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
23
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
24 for t in data['taxonomy']:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
25 if isinstance(data['taxonomy'][t], list):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
26 data['taxonomy'][t] = tuple(data['taxonomy'][t])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
27 return data
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
28
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
29
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
30 def dump_to_json(data, json_fp):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
31 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
32 Dump marker metadata to JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
33
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
34 :param json_fp: Path to JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
35 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
36 for m in data['markers']:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
37 data['markers'][m]['ext'] = list(data['markers'][m]['ext'])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
38
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
39 with open(json_fp, 'w') as json_f:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
40 json.dump(data, json_f)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
41
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
42
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
43 def transform_pkl_to_json(pkl_fp, json_fp):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
44 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
45 Read Pickle file and drop it to a JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
46
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
47 :param pkl_fp: Path to input Pickle file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
48 :param json_fp: Path to output JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
49 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
50 # load metadata from Pickle file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
51 with bz2.BZ2File(pkl_fp, 'r') as pkl_f:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
52 in_metadata = pickle.load(pkl_f)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
53
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
54 out_metadata = {
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
55 'markers': in_metadata['markers'],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
56 'taxonomy': in_metadata['taxonomy'],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
57 'merged_taxon': {}
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
58 }
8
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
59
3
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
60 # transform merged_taxons tuple keys to string
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
61 for k in in_metadata['merged_taxon']:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
62 n = ' , '.join(k)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
63 out_metadata[n] = in_metadata['merged_taxon'][k]
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
64
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
65 # dump metadata to JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
66 dump_to_json(out_metadata, json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
67
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
68
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
69 def transform_json_to_pkl(json_fp, pkl_fp):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
70 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
71 Read JSON file and drop it to a Pickle file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
72
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
73 :param json_fp: Path to input JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
74 :param pkl_fp: Path to output Pickle file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
75 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
76 # load metadata from JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
77 in_metadata = load_from_json(json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
78
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
79 out_metadata = {
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
80 'markers': in_metadata['markers'],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
81 'taxonomy': in_metadata['taxonomy'],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
82 'merged_taxon': {}
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
83 }
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
84 # transform merged_taxons keys to tuple
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
85 for k in in_metadata['merged_taxon']:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
86 n = ' , '.split(k)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
87 out_metadata[n] = in_metadata['merged_taxon'][k]
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
88
8
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
89 # Ensure that there are 8 taxonomy levels (for compatibility between Metaphlan v3 and v4)
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
90 # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432)
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
91 # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432)
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
92 for k in out_metadata['taxonomy']:
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
93 if out_metadata['taxonomy'][k][0].count('|') == 6:
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
94 out_metadata['taxonomy'][k] = (out_metadata['taxonomy'][k][0] + '|', out_metadata['taxonomy'][k][1])
27250f92a01a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents: 3
diff changeset
95
3
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
96 # dump metadata to Pickle file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
97 with bz2.BZ2File(pkl_fp, 'w') as pkl_f:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
98 pickle.dump(out_metadata, pkl_f)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
99
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
100
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
101 def add_marker(in_json_fp, out_json_fp, name, m_length, g_length, gca, k_name, k_id, p_name, p_id, c_name, c_id, o_name, o_id, f_name, f_id, g_name, g_id, s_name, s_id, t_name):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
102 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
103 Add marker to JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
104
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
105 :param in_json_fp: Path to input JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
106 :param out_json_fp: Path to output JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
107 :param name: Name of new marker
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
108 :param m_length: Length of new marker
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
109 :param g_length: List with lengths of genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
110 :param gca: List with GCA of genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
111 :param k_name: List with Name of Kingdom for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
112 :param k_id: List with NCBI id of Kingdom for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
113 :param p_name: List with Name of Phylum for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
114 :param p_id: List with NCBI id of Phylum for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
115 :param c_name: List with Name of Class for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
116 :param c_id: List with NCBI id of Class for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
117 :param o_name: List with Name of Order for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
118 :param o_id: List with NCBI id of Order for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
119 :param f_name: List with Name of Family for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
120 :param f_id: List with NCBI id of Family for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
121 :param g_name: List with Name of Genus for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
122 :param g_id: List with NCBI id of Genus for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
123 :param s_name: List with Name of Species for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
124 :param s_id: List with NCBI id of Species for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
125 :param t_name: List with Name of Strain for genomes from which the new marker has been extracted
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
126 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
127 metadata = load_from_json(in_json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
128
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
129 # check that all lists have same size
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
130 genome_n = len(g_length)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
131 if len(gca) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
132 raise ValueError("Missing/Extra values in GCA list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
133 if len(k_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
134 raise ValueError("Missing/Extra values in Kingdom name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
135 if len(k_id) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
136 raise ValueError("Missing/Extra values in Kingdom ID list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
137 if len(p_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
138 raise ValueError("Missing/Extra values in Phylum name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
139 if len(p_id) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
140 raise ValueError("Missing/Extra values in Phylum ID list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
141 if len(c_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
142 raise ValueError("Missing/Extra values in Class name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
143 if len(c_id) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
144 raise ValueError("Missing/Extra values in Class ID list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
145 if len(o_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
146 raise ValueError("Missing/Extra values in Order name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
147 if len(o_id) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
148 raise ValueError("Missing/Extra values in Order ID list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
149 if len(f_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
150 raise ValueError("Missing/Extra values in Family name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
151 if len(f_id) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
152 raise ValueError("Missing/Extra values in Family ID list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
153 if len(g_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
154 raise ValueError("Missing/Extra values in Genus name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
155 if len(g_id) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
156 raise ValueError("Missing/Extra values in Genus ID list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
157 if len(s_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
158 raise ValueError("Missing/Extra values in Species name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
159 if len(s_id) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
160 raise ValueError("Missing/Extra values in Species ID list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
161 if len(t_name) != genome_n:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
162 raise ValueError("Missing/Extra values in Strain name list")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
163
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
164 # create dictionary to aggregate genome taxonomies and identify marker taxonomy
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
165 taxonomy = {
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
166 'k': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
167 'p': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
168 'c': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
169 'o': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
170 'f': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
171 'g': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
172 's': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
173 't': set(),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
174 }
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
175
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
176 # parse genomes
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
177 for i in range(genome_n):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
178 # add taxonomy of new genome
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
179 g_taxo_names = "k__%s|p__%s|c__%s|o__%s|f__%s|g__%s|s__%s|t__%s" % (
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
180 k_name[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
181 p_name[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
182 c_name[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
183 o_name[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
184 f_name[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
185 g_name[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
186 s_name[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
187 t_name[i]
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
188 )
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
189 g_taxo_ids = "%s|%s|%s|%s|%s|%s|%s" % (
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
190 k_id[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
191 p_id[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
192 c_id[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
193 o_id[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
194 f_id[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
195 g_id[i],
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
196 s_id[i]
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
197 )
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
198 metadata['taxonomy'][g_taxo_names] = (g_taxo_ids, g_length[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
199 # aggregate taxon levels using sets
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
200 taxonomy['k'].add(k_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
201 taxonomy['p'].add(p_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
202 taxonomy['c'].add(c_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
203 taxonomy['o'].add(o_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
204 taxonomy['f'].add(f_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
205 taxonomy['g'].add(g_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
206 taxonomy['s'].add(s_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
207 taxonomy['t'].add(t_name[i])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
208
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
209 # extract clade and taxon of marker
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
210 clade = '' # last level before taxomy of genomes diverge
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
211 taxon = '' # combination of levels before divergence
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
212 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
213 taxo = list(taxonomy[level])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
214 if len(taxo) == 1:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
215 clade = taxo[0]
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
216 taxon = "%s|%s__%s" % (taxon, level, taxo)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
217
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
218 # add information about the new marker
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
219 metadata['markers'][name] = {
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
220 'clade': clade,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
221 'ext': set(gca),
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
222 'len': m_length,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
223 'taxon': taxon
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
224 }
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
225
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
226 dump_to_json(metadata, out_json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
227
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
228
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
229 def format_markers(marker_l):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
230 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
231 Format markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
232
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
233 :param marker_l: list of markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
234 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
235 markers = []
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
236 for m in marker_l:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
237 m = m.rstrip()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
238 if ' ' in m:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
239 markers.append(m.split(' ')[0])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
240 else:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
241 markers.append(m)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
242 return markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
243
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
244
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
245 def get_markers(marker_fp):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
246 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
247 Get markers from a file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
248
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
249 :param marker_fp: Path to file with markers (1 per line)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
250 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
251 # load markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
252 with open(marker_fp, 'r') as marker_f:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
253 markers = marker_f.readlines()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
254
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
255 # format markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
256 markers = format_markers(markers)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
257
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
258 return markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
259
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
260
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
261 def check_not_found_markers(found_markers, original_markers):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
262 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
263 Check list of markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
264
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
265 :param found_markers: list of found markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
266 :param original_markers: list of original markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
267 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
268 if len(found_markers) != len(original_markers):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
269 print('markers not found:')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
270 for m in original_markers:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
271 if m not in found_markers:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
272 print('- "%s"' % m)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
273
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
274
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
275 def prune_taxonomy(in_taxonomy, taxon_s, gca_s):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
276 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
277 Prune taxonomy to keep only listed taxonomy
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
278
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
279 :param in_taxonomy: dictionary with list of taxonomy
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
280 :param taxon_s: set of taxons to keep
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
281 :param gca_s: set of GCA ids to keep
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
282 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
283 out_taxonomy = {}
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
284 kept_taxonomy = set()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
285 kept_taxons = set()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
286 kept_gca = set()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
287 for t, v in in_taxonomy.items():
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
288 # check if t match element in list of taxon_s
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
289 kept_taxon = False
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
290 for t_k in taxon_s:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
291 if t_k in t:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
292 kept_taxon = True
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
293 out_taxonomy[t] = v
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
294 kept_taxonomy.add(t)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
295 kept_taxons.add(t_k)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
296 break
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
297 # check if GCA in the taxon id
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
298 s = re.search(r'GCA_\d+$', t)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
299 if s:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
300 gca = s[0]
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
301 # check if GCA in taxon id is in the list GCA to keep
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
302 if gca in gca_s:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
303 kept_gca.add(gca)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
304 if not kept_taxon:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
305 out_taxonomy[t] = v
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
306 kept_taxonomy.add(t)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
307
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
308 print('%s kept taxonomy' % len(kept_taxonomy))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
309 print('%s / %s taxons not found' % (len(taxon_s) - len(kept_taxons), len(taxon_s)))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
310 print('%s / %s GCA taxons not found' % (len(gca_s) - len(kept_gca), len(gca_s)))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
311 return out_taxonomy
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
312
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
313
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
314 def remove_markers(in_json_fp, marker_fp, out_json_fp, kept_marker_fp):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
315 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
316 Remove markers from JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
317
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
318 :param in_json_fp: Path to input JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
319 :param marker_fp: Path to file with markers to remove (1 per line)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
320 :param out_json_fp: Path to output JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
321 :param kept_marker_fp: Path to file with kept markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
322 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
323 in_metadata = load_from_json(in_json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
324
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
325 # load markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
326 markers_to_remove = set(get_markers(marker_fp))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
327 print('%s markers to remove' % len(markers_to_remove))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
328
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
329 # keep merged_taxon
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
330 out_metadata = {
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
331 'markers': {},
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
332 'taxonomy': {},
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
333 'merged_taxon': in_metadata['merged_taxon']
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
334 }
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
335
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
336 # parse markers to keep
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
337 removed_markers = []
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
338 kept_markers = []
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
339 taxons_to_keep = set()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
340 gca_to_keep = set()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
341 for m, v in in_metadata['markers'].items():
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
342 if m not in markers_to_remove:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
343 out_metadata['markers'][m] = v
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
344 kept_markers.append(m)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
345 taxons_to_keep.add(v['taxon'])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
346 gca_to_keep.update(v['ext'])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
347 else:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
348 removed_markers.append(m)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
349 print('%s removed markers' % len(removed_markers))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
350
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
351 # check markers that are not found
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
352 check_not_found_markers(removed_markers, markers_to_remove)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
353
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
354 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
355 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
356
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
357 # save to JSON
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
358 dump_to_json(out_metadata, out_json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
359
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
360 # write list of kept markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
361 with open(kept_marker_fp, 'w') as kept_marker_f:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
362 for m in kept_markers:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
363 kept_marker_f.write("%s\n" % m)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
364
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
365
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
366 def keep_markers(in_json_fp, marker_fp, out_json_fp):
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
367 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
368 Keep markers from JSON file, others will be removed
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
369
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
370 :param in_json_fp: Path to input JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
371 :param marker_fp: Path to file with markers to keep (1 per line)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
372 :param out_json_fp: Path to output JSON file
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
373 '''
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
374 in_metadata = load_from_json(in_json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
375
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
376 # load markers
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
377 markers_to_keep = set(get_markers(marker_fp))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
378 print('%s markers to keep' % len(markers_to_keep))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
379
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
380 # keep merged_taxon
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
381 out_metadata = {
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
382 'markers': {},
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
383 'taxonomy': {},
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
384 'merged_taxon': in_metadata['merged_taxon']
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
385 }
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
386
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
387 # parse markers to keep
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
388 kept_markers = []
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
389 taxons_to_keep = set()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
390 gca_to_keep = set()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
391 for m, v in in_metadata['markers'].items():
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
392 if m in markers_to_keep:
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
393 out_metadata['markers'][m] = v
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
394 kept_markers.append(m)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
395 taxons_to_keep.add(v['taxon'])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
396 gca_to_keep.update(v['ext'])
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
397 print('%s kept markers' % len(kept_markers))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
398
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
399 # check markers that are not found
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
400 check_not_found_markers(kept_markers, markers_to_keep)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
401
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
402 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
403 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
404
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
405 # save to JSON
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
406 dump_to_json(out_metadata, out_json_fp)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
407
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
408
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
409 if __name__ == '__main__':
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
410 # Read command line
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
411 parser = argparse.ArgumentParser(description='Customize MetaPhlan database')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
412 subparsers = parser.add_subparsers(dest='function')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
413 # transform_pkl_to_json subcommand
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
414 pkl_to_json_parser = subparsers.add_parser('transform_pkl_to_json', help='Transform Pickle to JSON to get marker metadata')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
415 pkl_to_json_parser.add_argument('--pkl', help="Path to input Pickle file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
416 pkl_to_json_parser.add_argument('--json', help="Path to output JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
417 # transform_json_to_pkl subcommand
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
418 json_to_pkl_parser = subparsers.add_parser('transform_json_to_pkl', help='Transform JSON to Pickle to push marker metadata')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
419 json_to_pkl_parser.add_argument('--json', help="Path to input JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
420 json_to_pkl_parser.add_argument('--pkl', help="Path to output Pickle file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
421 # add_marker subcommand
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
422 add_marker_parser = subparsers.add_parser('add_marker', help='Add new marker to JSON file')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
423 add_marker_parser.add_argument('--in_json', help="Path to input JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
424 add_marker_parser.add_argument('--out_json', help="Path to output JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
425 add_marker_parser.add_argument('--name', help="Name of new marker")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
426 add_marker_parser.add_argument('--m_length', help="Length of new marker")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
427 add_marker_parser.add_argument('--g_length', help="Length of genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
428 add_marker_parser.add_argument('--gca', help="GCA of genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
429 add_marker_parser.add_argument('--k_name', help="Name of Kingdom for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
430 add_marker_parser.add_argument('--k_id', help="NCBI id of Kingdom for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
431 add_marker_parser.add_argument('--p_name', help="Name of Phylum for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
432 add_marker_parser.add_argument('--p_id', help="NCBI id of Phylum for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
433 add_marker_parser.add_argument('--c_name', help="Name of Class for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
434 add_marker_parser.add_argument('--c_id', help="NCBI id of Class for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
435 add_marker_parser.add_argument('--o_name', help="Name of Order for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
436 add_marker_parser.add_argument('--o_id', help="NCBI id of Order for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
437 add_marker_parser.add_argument('--f_name', help="Name of Family for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
438 add_marker_parser.add_argument('--f_id', help="NCBI id of Family for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
439 add_marker_parser.add_argument('--g_name', help="Name of Genus for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
440 add_marker_parser.add_argument('--g_id', help="NCBI id of Genus for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
441 add_marker_parser.add_argument('--s_name', help="Name of Species for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
442 add_marker_parser.add_argument('--s_id', help="NCBI id of Species for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
443 add_marker_parser.add_argument('--t_name', help="Name of Strain for genome from which the new marker has been extracted", action="append")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
444 # remove_markers subcommand
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
445 remove_markers_parser = subparsers.add_parser('remove_markers', help='Remove markers from JSON file')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
446 remove_markers_parser.add_argument('--in_json', help="Path to input JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
447 remove_markers_parser.add_argument('--markers', help="Path to file with markers to remove (1 per line)")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
448 remove_markers_parser.add_argument('--out_json', help="Path to output JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
449 remove_markers_parser.add_argument('--kept_markers', help="Path to file with kept markers")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
450 # keep_markers subcommand
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
451 keep_markers_parser = subparsers.add_parser('keep_markers', help='Keep markers from JSON file, others will be removed')
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
452 keep_markers_parser.add_argument('--in_json', help="Path to input JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
453 keep_markers_parser.add_argument('--markers', help="Path to file with markers to keep (1 per line)")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
454 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file")
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
455
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
456 args = parser.parse_args()
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
457
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
458 if args.function == 'transform_pkl_to_json':
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
459 transform_pkl_to_json(Path(args.pkl), Path(args.json))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
460 elif args.function == 'transform_json_to_pkl':
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
461 transform_json_to_pkl(Path(args.json), Path(args.pkl))
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
462 elif args.function == 'add_marker':
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
463 add_marker(
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
464 args.in_json,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
465 args.out_json,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
466 args.name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
467 args.m_length,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
468 args.g_length,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
469 args.gca,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
470 args.k_name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
471 args.k_id,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
472 args.p_name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
473 args.p_id,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
474 args.c_name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
475 args.c_id,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
476 args.o_name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
477 args.o_id,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
478 args.f_name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
479 args.f_id,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
480 args.g_name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
481 args.g_id,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
482 args.s_name,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
483 args.s_id,
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
484 args.t_name)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
485 elif args.function == 'remove_markers':
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
486 remove_markers(args.in_json, args.markers, args.out_json, args.kept_markers)
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
487 elif args.function == 'keep_markers':
3f05bf162005 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
iuc
parents:
diff changeset
488 keep_markers(args.in_json, args.markers, args.out_json)