comparison customizemetadata.py @ 2:080ea153677c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
author iuc
date Tue, 07 Feb 2023 19:48:42 +0000
parents c3d043160f09
children
comparison
equal deleted inserted replaced
1:4033827c76b8 2:080ea153677c
4 import argparse 4 import argparse
5 import bz2 5 import bz2
6 import json 6 import json
7 import pickle 7 import pickle
8 import re 8 import re
9 import sys
10 from importlib.metadata import version
9 from pathlib import Path 11 from pathlib import Path
12
13 from packaging.version import Version
10 14
11 15
12 def load_from_json(json_fp): 16 def load_from_json(json_fp):
13 ''' 17 '''
14 Read JSON file with marker metadata 18 Read JSON file with marker metadata
54 out_metadata = { 58 out_metadata = {
55 'markers': in_metadata['markers'], 59 'markers': in_metadata['markers'],
56 'taxonomy': in_metadata['taxonomy'], 60 'taxonomy': in_metadata['taxonomy'],
57 'merged_taxon': {} 61 'merged_taxon': {}
58 } 62 }
63
59 # transform merged_taxons tuple keys to string 64 # transform merged_taxons tuple keys to string
60 for k in in_metadata['merged_taxon']: 65 for k in in_metadata['merged_taxon']:
61 n = ' , '.join(k) 66 n = ' , '.join(k)
62 out_metadata[n] = in_metadata['merged_taxon'][k] 67 out_metadata[n] = in_metadata['merged_taxon'][k]
63 68
64 # dump metadata to JSON file 69 # dump metadata to JSON file
65 dump_to_json(out_metadata, json_fp) 70 dump_to_json(out_metadata, json_fp)
71
72
73 def validate_map_version(infile, file_type):
74 '''
75 Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0).
76
77 :param infile: Path to input Pickle/JSON file
78 :param file_type: String definining file type, pkl or JSON. Case-insensitive
79 '''
80 file_type = file_type.lower()
81 if file_type == 'pkl' or file_type == 'pickle':
82 # load metadata from Pickle file
83 with bz2.BZ2File(infile, 'r') as pkl_f:
84 in_metadata = pickle.load(pkl_f)
85 elif file_type == 'json':
86 in_metadata = load_from_json(infile)
87 else:
88 raise ValueError("Unsupported file type to validate.")
89
90 # Get metaphlan version in $PATH
91 metaphlan_version = Version(version('metaphlan'))
92
93 # Ensure that there are 8 taxonomy levels separated with "|"s.
94 # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432)
95 # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432)
96 for k in in_metadata['taxonomy']:
97 if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')):
98 # raise ValueError("Missing/Extra values in GCA list")
99 print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version))
100 sys.exit(42)
101
102 print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version))
66 103
67 104
68 def transform_json_to_pkl(json_fp, pkl_fp): 105 def transform_json_to_pkl(json_fp, pkl_fp):
69 ''' 106 '''
70 Read JSON file and drop it to a Pickle file 107 Read JSON file and drop it to a Pickle file
78 out_metadata = { 115 out_metadata = {
79 'markers': in_metadata['markers'], 116 'markers': in_metadata['markers'],
80 'taxonomy': in_metadata['taxonomy'], 117 'taxonomy': in_metadata['taxonomy'],
81 'merged_taxon': {} 118 'merged_taxon': {}
82 } 119 }
120
83 # transform merged_taxons keys to tuple 121 # transform merged_taxons keys to tuple
84 for k in in_metadata['merged_taxon']: 122 for k in in_metadata['merged_taxon']:
85 n = ' , '.split(k) 123 n = ' , '.split(k)
86 out_metadata[n] = in_metadata['merged_taxon'][k] 124 out_metadata[n] = in_metadata['merged_taxon'][k]
87 125
446 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file") 484 keep_markers_parser.add_argument('--out_json', help="Path to output JSON file")
447 485
448 args = parser.parse_args() 486 args = parser.parse_args()
449 487
450 if args.function == 'transform_pkl_to_json': 488 if args.function == 'transform_pkl_to_json':
489 validate_map_version(Path(args.pkl), 'pkl')
451 transform_pkl_to_json(Path(args.pkl), Path(args.json)) 490 transform_pkl_to_json(Path(args.pkl), Path(args.json))
452 elif args.function == 'transform_json_to_pkl': 491 elif args.function == 'transform_json_to_pkl':
492 validate_map_version(Path(args.json), 'json')
453 transform_json_to_pkl(Path(args.json), Path(args.pkl)) 493 transform_json_to_pkl(Path(args.json), Path(args.pkl))
454 elif args.function == 'add_marker': 494 elif args.function == 'add_marker':
455 add_marker( 495 add_marker(
456 args.in_json, 496 args.in_json,
457 args.out_json, 497 args.out_json,