Mercurial > repos > thanhlv > humann_reduce_table
annotate customizemetadata.py @ 0:e152169e5c44 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author | thanhlv |
---|---|
date | Mon, 13 Feb 2023 16:23:08 +0000 |
parents | |
children |
rev | line source |
---|---|
0
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
3 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
4 import argparse |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
5 import bz2 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
6 import json |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
7 import pickle |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
8 import re |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
9 import sys |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
10 from importlib.metadata import version |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
11 from pathlib import Path |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
12 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
13 from packaging.version import Version |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
14 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
15 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
16 def load_from_json(json_fp): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
17 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
18 Read JSON file with marker metadata |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
19 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
20 :param json_fp: Path to JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
21 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
22 with open(json_fp, 'r') as json_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
23 data = json.load(json_f) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
24 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
25 for m in data['markers']: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
26 data['markers'][m]['ext'] = set(data['markers'][m]['ext']) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
27 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
28 for t in data['taxonomy']: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
29 if isinstance(data['taxonomy'][t], list): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
30 data['taxonomy'][t] = tuple(data['taxonomy'][t]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
31 return data |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
32 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
33 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
34 def dump_to_json(data, json_fp): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
35 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
36 Dump marker metadata to JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
37 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
38 :param json_fp: Path to JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
39 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
40 for m in data['markers']: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
41 data['markers'][m]['ext'] = list(data['markers'][m]['ext']) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
42 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
43 with open(json_fp, 'w') as json_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
44 json.dump(data, json_f) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
45 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
46 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
47 def transform_pkl_to_json(pkl_fp, json_fp): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
48 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
49 Read Pickle file and drop it to a JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
50 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
51 :param pkl_fp: Path to input Pickle file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
52 :param json_fp: Path to output JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
53 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
54 # load metadata from Pickle file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
55 with bz2.BZ2File(pkl_fp, 'r') as pkl_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
56 in_metadata = pickle.load(pkl_f) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
57 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
58 out_metadata = { |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
59 'markers': in_metadata['markers'], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
60 'taxonomy': in_metadata['taxonomy'], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
61 'merged_taxon': {} |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
62 } |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
63 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
64 # transform merged_taxons tuple keys to string |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
65 for k in in_metadata['merged_taxon']: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
66 n = ' , '.join(k) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
67 out_metadata[n] = in_metadata['merged_taxon'][k] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
68 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
69 # dump metadata to JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
70 dump_to_json(out_metadata, json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
71 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
72 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
73 def validate_map_version(infile, file_type): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
74 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
75 Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0). |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
76 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
77 :param infile: Path to input Pickle/JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
78 :param file_type: String definining file type, pkl or JSON. Case-insensitive |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
79 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
80 file_type = file_type.lower() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
81 if file_type == 'pkl' or file_type == 'pickle': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
82 # load metadata from Pickle file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
83 with bz2.BZ2File(infile, 'r') as pkl_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
84 in_metadata = pickle.load(pkl_f) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
85 elif file_type == 'json': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
86 in_metadata = load_from_json(infile) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
87 else: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
88 raise ValueError("Unsupported file type to validate.") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
89 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
90 # Get metaphlan version in $PATH |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
91 metaphlan_version = Version(version('metaphlan')) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
92 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
93 # Ensure that there are 8 taxonomy levels separated with "|"s. |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
94 # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
95 # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
96 for k in in_metadata['taxonomy']: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
97 if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
98 # raise ValueError("Missing/Extra values in GCA list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
99 print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
100 sys.exit(42) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
101 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
102 print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
103 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
104 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
105 def transform_json_to_pkl(json_fp, pkl_fp): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
106 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
107 Read JSON file and drop it to a Pickle file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
108 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
109 :param json_fp: Path to input JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
110 :param pkl_fp: Path to output Pickle file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
111 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
112 # load metadata from JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
113 in_metadata = load_from_json(json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
114 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
115 out_metadata = { |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
116 'markers': in_metadata['markers'], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
117 'taxonomy': in_metadata['taxonomy'], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
118 'merged_taxon': {} |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
119 } |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
120 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
121 # transform merged_taxons keys to tuple |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
122 for k in in_metadata['merged_taxon']: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
123 n = ' , '.split(k) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
124 out_metadata[n] = in_metadata['merged_taxon'][k] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
125 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
126 # dump metadata to Pickle file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
127 with bz2.BZ2File(pkl_fp, 'w') as pkl_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
128 pickle.dump(out_metadata, pkl_f) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
129 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
130 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
131 def add_marker(in_json_fp, out_json_fp, name, m_length, g_length, gca, k_name, k_id, p_name, p_id, c_name, c_id, o_name, o_id, f_name, f_id, g_name, g_id, s_name, s_id, t_name): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
132 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
133 Add marker to JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
134 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
135 :param in_json_fp: Path to input JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
136 :param out_json_fp: Path to output JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
137 :param name: Name of new marker |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
138 :param m_length: Length of new marker |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
139 :param g_length: List with lengths of genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
140 :param gca: List with GCA of genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
141 :param k_name: List with Name of Kingdom for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
142 :param k_id: List with NCBI id of Kingdom for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
143 :param p_name: List with Name of Phylum for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
144 :param p_id: List with NCBI id of Phylum for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
145 :param c_name: List with Name of Class for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
146 :param c_id: List with NCBI id of Class for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
147 :param o_name: List with Name of Order for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
148 :param o_id: List with NCBI id of Order for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
149 :param f_name: List with Name of Family for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
150 :param f_id: List with NCBI id of Family for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
151 :param g_name: List with Name of Genus for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
152 :param g_id: List with NCBI id of Genus for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
153 :param s_name: List with Name of Species for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
154 :param s_id: List with NCBI id of Species for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
155 :param t_name: List with Name of Strain for genomes from which the new marker has been extracted |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
156 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
157 metadata = load_from_json(in_json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
158 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
159 # check that all lists have same size |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
160 genome_n = len(g_length) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
161 if len(gca) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
162 raise ValueError("Missing/Extra values in GCA list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
163 if len(k_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
164 raise ValueError("Missing/Extra values in Kingdom name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
165 if len(k_id) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
166 raise ValueError("Missing/Extra values in Kingdom ID list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
167 if len(p_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
168 raise ValueError("Missing/Extra values in Phylum name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
169 if len(p_id) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
170 raise ValueError("Missing/Extra values in Phylum ID list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
171 if len(c_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
172 raise ValueError("Missing/Extra values in Class name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
173 if len(c_id) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
174 raise ValueError("Missing/Extra values in Class ID list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
175 if len(o_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
176 raise ValueError("Missing/Extra values in Order name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
177 if len(o_id) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
178 raise ValueError("Missing/Extra values in Order ID list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
179 if len(f_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
180 raise ValueError("Missing/Extra values in Family name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
181 if len(f_id) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
182 raise ValueError("Missing/Extra values in Family ID list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
183 if len(g_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
184 raise ValueError("Missing/Extra values in Genus name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
185 if len(g_id) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
186 raise ValueError("Missing/Extra values in Genus ID list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
187 if len(s_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
188 raise ValueError("Missing/Extra values in Species name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
189 if len(s_id) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
190 raise ValueError("Missing/Extra values in Species ID list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
191 if len(t_name) != genome_n: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
192 raise ValueError("Missing/Extra values in Strain name list") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
193 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
194 # create dictionary to aggregate genome taxonomies and identify marker taxonomy |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
195 taxonomy = { |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
196 'k': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
197 'p': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
198 'c': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
199 'o': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
200 'f': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
201 'g': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
202 's': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
203 't': set(), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
204 } |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
205 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
206 # parse genomes |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
207 for i in range(genome_n): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
208 # add taxonomy of new genome |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
209 g_taxo_names = "k__%s|p__%s|c__%s|o__%s|f__%s|g__%s|s__%s|t__%s" % ( |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
210 k_name[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
211 p_name[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
212 c_name[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
213 o_name[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
214 f_name[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
215 g_name[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
216 s_name[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
217 t_name[i] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
218 ) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
219 g_taxo_ids = "%s|%s|%s|%s|%s|%s|%s" % ( |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
220 k_id[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
221 p_id[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
222 c_id[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
223 o_id[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
224 f_id[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
225 g_id[i], |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
226 s_id[i] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
227 ) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
228 metadata['taxonomy'][g_taxo_names] = (g_taxo_ids, g_length[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
229 # aggregate taxon levels using sets |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
230 taxonomy['k'].add(k_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
231 taxonomy['p'].add(p_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
232 taxonomy['c'].add(c_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
233 taxonomy['o'].add(o_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
234 taxonomy['f'].add(f_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
235 taxonomy['g'].add(g_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
236 taxonomy['s'].add(s_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
237 taxonomy['t'].add(t_name[i]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
238 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
239 # extract clade and taxon of marker |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
240 clade = '' # last level before taxomy of genomes diverge |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
241 taxon = '' # combination of levels before divergence |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
242 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
243 taxo = list(taxonomy[level]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
244 if len(taxo) == 1: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
245 clade = taxo[0] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
246 taxon = "%s|%s__%s" % (taxon, level, taxo) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
247 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
248 # add information about the new marker |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
249 metadata['markers'][name] = { |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
250 'clade': clade, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
251 'ext': set(gca), |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
252 'len': m_length, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
253 'taxon': taxon |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
254 } |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
255 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
256 dump_to_json(metadata, out_json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
257 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
258 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
259 def format_markers(marker_l): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
260 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
261 Format markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
262 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
263 :param marker_l: list of markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
264 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
265 markers = [] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
266 for m in marker_l: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
267 m = m.rstrip() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
268 if ' ' in m: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
269 markers.append(m.split(' ')[0]) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
270 else: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
271 markers.append(m) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
272 return markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
273 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
274 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
275 def get_markers(marker_fp): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
276 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
277 Get markers from a file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
278 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
279 :param marker_fp: Path to file with markers (1 per line) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
280 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
281 # load markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
282 with open(marker_fp, 'r') as marker_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
283 markers = marker_f.readlines() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
284 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
285 # format markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
286 markers = format_markers(markers) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
287 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
288 return markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
289 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
290 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
291 def check_not_found_markers(found_markers, original_markers): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
292 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
293 Check list of markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
294 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
295 :param found_markers: list of found markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
296 :param original_markers: list of original markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
297 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
298 if len(found_markers) != len(original_markers): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
299 print('markers not found:') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
300 for m in original_markers: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
301 if m not in found_markers: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
302 print('- "%s"' % m) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
303 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
304 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
305 def prune_taxonomy(in_taxonomy, taxon_s, gca_s): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
306 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
307 Prune taxonomy to keep only listed taxonomy |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
308 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
309 :param in_taxonomy: dictionary with list of taxonomy |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
310 :param taxon_s: set of taxons to keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
311 :param gca_s: set of GCA ids to keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
312 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
313 out_taxonomy = {} |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
314 kept_taxonomy = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
315 kept_taxons = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
316 kept_gca = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
317 for t, v in in_taxonomy.items(): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
318 # check if t match element in list of taxon_s |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
319 kept_taxon = False |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
320 for t_k in taxon_s: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
321 if t_k in t: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
322 kept_taxon = True |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
323 out_taxonomy[t] = v |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
324 kept_taxonomy.add(t) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
325 kept_taxons.add(t_k) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
326 break |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
327 # check if GCA in the taxon id |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
328 s = re.search(r'GCA_\d+$', t) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
329 if s: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
330 gca = s[0] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
331 # check if GCA in taxon id is in the list GCA to keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
332 if gca in gca_s: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
333 kept_gca.add(gca) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
334 if not kept_taxon: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
335 out_taxonomy[t] = v |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
336 kept_taxonomy.add(t) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
337 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
338 print('%s kept taxonomy' % len(kept_taxonomy)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
339 print('%s / %s taxons not found' % (len(taxon_s) - len(kept_taxons), len(taxon_s))) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
340 print('%s / %s GCA taxons not found' % (len(gca_s) - len(kept_gca), len(gca_s))) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
341 return out_taxonomy |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
342 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
343 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
344 def remove_markers(in_json_fp, marker_fp, out_json_fp, kept_marker_fp): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
345 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
346 Remove markers from JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
347 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
348 :param in_json_fp: Path to input JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
349 :param marker_fp: Path to file with markers to remove (1 per line) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
350 :param out_json_fp: Path to output JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
351 :param kept_marker_fp: Path to file with kept markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
352 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
353 in_metadata = load_from_json(in_json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
354 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
355 # load markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
356 markers_to_remove = set(get_markers(marker_fp)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
357 print('%s markers to remove' % len(markers_to_remove)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
358 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
359 # keep merged_taxon |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
360 out_metadata = { |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
361 'markers': {}, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
362 'taxonomy': {}, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
363 'merged_taxon': in_metadata['merged_taxon'] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
364 } |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
365 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
366 # parse markers to keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
367 removed_markers = [] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
368 kept_markers = [] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
369 taxons_to_keep = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
370 gca_to_keep = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
371 for m, v in in_metadata['markers'].items(): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
372 if m not in markers_to_remove: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
373 out_metadata['markers'][m] = v |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
374 kept_markers.append(m) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
375 taxons_to_keep.add(v['taxon']) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
376 gca_to_keep.update(v['ext']) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
377 else: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
378 removed_markers.append(m) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
379 print('%s removed markers' % len(removed_markers)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
380 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
381 # check markers that are not found |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
382 check_not_found_markers(removed_markers, markers_to_remove) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
383 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
384 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
385 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
386 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
387 # save to JSON |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
388 dump_to_json(out_metadata, out_json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
389 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
390 # write list of kept markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
391 with open(kept_marker_fp, 'w') as kept_marker_f: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
392 for m in kept_markers: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
393 kept_marker_f.write("%s\n" % m) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
394 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
395 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
396 def keep_markers(in_json_fp, marker_fp, out_json_fp): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
397 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
398 Keep markers from JSON file, others will be removed |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
399 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
400 :param in_json_fp: Path to input JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
401 :param marker_fp: Path to file with markers to keep (1 per line) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
402 :param out_json_fp: Path to output JSON file |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
403 ''' |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
404 in_metadata = load_from_json(in_json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
405 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
406 # load markers |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
407 markers_to_keep = set(get_markers(marker_fp)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
408 print('%s markers to keep' % len(markers_to_keep)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
409 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
410 # keep merged_taxon |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
411 out_metadata = { |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
412 'markers': {}, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
413 'taxonomy': {}, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
414 'merged_taxon': in_metadata['merged_taxon'] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
415 } |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
416 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
417 # parse markers to keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
418 kept_markers = [] |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
419 taxons_to_keep = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
420 gca_to_keep = set() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
421 for m, v in in_metadata['markers'].items(): |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
422 if m in markers_to_keep: |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
423 out_metadata['markers'][m] = v |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
424 kept_markers.append(m) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
425 taxons_to_keep.add(v['taxon']) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
426 gca_to_keep.update(v['ext']) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
427 print('%s kept markers' % len(kept_markers)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
428 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
429 # check markers that are not found |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
430 check_not_found_markers(kept_markers, markers_to_keep) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
431 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
432 # keep only taxonomy in taxons_to_keep or with GCA in gca_to_keep |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
433 out_metadata['taxonomy'] = prune_taxonomy(in_metadata['taxonomy'], taxons_to_keep, gca_to_keep) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
434 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
435 # save to JSON |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
436 dump_to_json(out_metadata, out_json_fp) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
437 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
438 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
439 if __name__ == '__main__': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
440 # Read command line |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
441 parser = argparse.ArgumentParser(description='Customize MetaPhlan database') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
442 subparsers = parser.add_subparsers(dest='function') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
443 # transform_pkl_to_json subcommand |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
444 pkl_to_json_parser = subparsers.add_parser('transform_pkl_to_json', help='Transform Pickle to JSON to get marker metadata') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
445 pkl_to_json_parser.add_argument('--pkl', help="Path to input Pickle file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
446 pkl_to_json_parser.add_argument('--json', help="Path to output JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
447 # transform_json_to_pkl subcommand |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
448 json_to_pkl_parser = subparsers.add_parser('transform_json_to_pkl', help='Transform JSON to Pickle to push marker metadata') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
449 json_to_pkl_parser.add_argument('--json', help="Path to input JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
450 json_to_pkl_parser.add_argument('--pkl', help="Path to output Pickle file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
451 # add_marker subcommand |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
452 add_marker_parser = subparsers.add_parser('add_marker', help='Add new marker to JSON file') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
453 add_marker_parser.add_argument('--in_json', help="Path to input JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
454 add_marker_parser.add_argument('--out_json', help="Path to output JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
455 add_marker_parser.add_argument('--name', help="Name of new marker") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
456 add_marker_parser.add_argument('--m_length', help="Length of new marker") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
457 add_marker_parser.add_argument('--g_length', help="Length of genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
458 add_marker_parser.add_argument('--gca', help="GCA of genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
459 add_marker_parser.add_argument('--k_name', help="Name of Kingdom for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
460 add_marker_parser.add_argument('--k_id', help="NCBI id of Kingdom for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
461 add_marker_parser.add_argument('--p_name', help="Name of Phylum for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
462 add_marker_parser.add_argument('--p_id', help="NCBI id of Phylum for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
463 add_marker_parser.add_argument('--c_name', help="Name of Class for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
464 add_marker_parser.add_argument('--c_id', help="NCBI id of Class for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
465 add_marker_parser.add_argument('--o_name', help="Name of Order for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
466 add_marker_parser.add_argument('--o_id', help="NCBI id of Order for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
467 add_marker_parser.add_argument('--f_name', help="Name of Family for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
468 add_marker_parser.add_argument('--f_id', help="NCBI id of Family for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
469 add_marker_parser.add_argument('--g_name', help="Name of Genus for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
470 add_marker_parser.add_argument('--g_id', help="NCBI id of Genus for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
471 add_marker_parser.add_argument('--s_name', help="Name of Species for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
472 add_marker_parser.add_argument('--s_id', help="NCBI id of Species for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
473 add_marker_parser.add_argument('--t_name', help="Name of Strain for genome from which the new marker has been extracted", action="append") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
474 # remove_markers subcommand |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
475 remove_markers_parser = subparsers.add_parser('remove_markers', help='Remove markers from JSON file') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
476 remove_markers_parser.add_argument('--in_json', help="Path to input JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
477 remove_markers_parser.add_argument('--markers', help="Path to file with markers to remove (1 per line)") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
478 remove_markers_parser.add_argument('--out_json', help="Path to output JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
479 remove_markers_parser.add_argument('--kept_markers', help="Path to file with kept markers") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
480 # keep_markers subcommand |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
481 keep_markers_parser = subparsers.add_parser('keep_markers', help='Keep markers from JSON file, others will be removed') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
482 keep_markers_parser.add_argument('--in_json', help="Path to input JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
483 keep_markers_parser.add_argument('--markers', help="Path to file with markers to keep (1 per line)") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
484 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
485 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
486 args = parser.parse_args() |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
487 |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
488 if args.function == 'transform_pkl_to_json': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
489 validate_map_version(Path(args.pkl), 'pkl') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
490 transform_pkl_to_json(Path(args.pkl), Path(args.json)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
491 elif args.function == 'transform_json_to_pkl': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
492 validate_map_version(Path(args.json), 'json') |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
493 transform_json_to_pkl(Path(args.json), Path(args.pkl)) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
494 elif args.function == 'add_marker': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
495 add_marker( |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
496 args.in_json, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
497 args.out_json, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
498 args.name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
499 args.m_length, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
500 args.g_length, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
501 args.gca, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
502 args.k_name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
503 args.k_id, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
504 args.p_name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
505 args.p_id, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
506 args.c_name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
507 args.c_id, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
508 args.o_name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
509 args.o_id, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
510 args.f_name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
511 args.f_id, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
512 args.g_name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
513 args.g_id, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
514 args.s_name, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
515 args.s_id, |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
516 args.t_name) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
517 elif args.function == 'remove_markers': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
518 remove_markers(args.in_json, args.markers, args.out_json, args.kept_markers) |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
519 elif args.function == 'keep_markers': |
e152169e5c44
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
thanhlv
parents:
diff
changeset
|
520 keep_markers(args.in_json, args.markers, args.out_json) |