Previous changeset 1:4137b492233e (2021-05-19) Next changeset 3:a4d7148ccee4 (2023-03-29) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd |
modified:
customizemetadata.py macros.xml test-data/barplot2.pdf test-data/metaphlan_database.loc test-data/test-db/metaphlan-db/demo-db-v30.json test-data/test-db/metaphlan-db/demo-db-v30.pkl |
added:
test-data/test-db/metaphlan-db/customizemapping.py test-data/test-db/metaphlan-db/demo-db-v30.1.bt2l test-data/test-db/metaphlan-db/demo-db-v30.2.bt2l test-data/test-db/metaphlan-db/demo-db-v30.3.bt2l test-data/test-db/metaphlan-db/demo-db-v30.4.bt2l test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2l test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2l test-data/test-db/metaphlan-db/old-structure/demo-db-v30.1.bt2 test-data/test-db/metaphlan-db/old-structure/demo-db-v30.2.bt2 test-data/test-db/metaphlan-db/old-structure/demo-db-v30.3.bt2 test-data/test-db/metaphlan-db/old-structure/demo-db-v30.4.bt2 test-data/test-db/metaphlan-db/old-structure/demo-db-v30.json test-data/test-db/metaphlan-db/old-structure/demo-db-v30.pkl test-data/test-db/metaphlan-db/old-structure/demo-db-v30.rev.1.bt2 test-data/test-db/metaphlan-db/old-structure/demo-db-v30.rev.2.bt2 test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz |
removed:
customizemapping.py test-data/test-db/metaphlan-db/demo-db-v30.1.bt2 test-data/test-db/metaphlan-db/demo-db-v30.2.bt2 test-data/test-db/metaphlan-db/demo-db-v30.3.bt2 test-data/test-db/metaphlan-db/demo-db-v30.4.bt2 test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2 test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2 test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz transform_json_to_pkl.py |
b |
diff -r 4137b492233e -r d7e9a3c41657 customizemapping.py --- a/customizemapping.py Wed May 19 17:02:37 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,47 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -from pathlib import Path - - -if __name__ == '__main__': - # Read command line - parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping') - parser.add_argument('--in_mapping', help="Path to mapping file to reduce") - parser.add_argument('--features', help="Path to tabular file with features to keep in first column") - parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns") - parser.add_argument('--out_mapping', help="Path to reduced mapping file") - args = parser.parse_args() - - in_mapping_fp = Path(args.in_mapping) - feature_fp = Path(args.features) - element_fp = Path(args.elements) - out_mapping_fp = Path(args.out_mapping) - - # extract features to keep - features = set() - with open(feature_fp, 'r') as feature_f: - for line in feature_f.readlines(): - features.add(line.split("\t")[0]) - print(features) - - # extract elements to keep - elements = set() - with open(element_fp, 'r') as element_f: - for line in element_f.readlines(): - elements.add(line.split("\t")[0]) - print(elements) - - # write mapping for features to keep while keeping only elements - with open(in_mapping_fp, 'r') as in_mapping_f: - with open(out_mapping_fp, 'w') as out_mapping_f: - for line in in_mapping_f.readlines(): - l_split = line.split("\t") - feat = l_split[0] - if feat in features: - to_write = [feat] - for e in l_split[1:]: - if e in elements: - to_write.append(e) - out_mapping_f.write("%s\n" % '\t'.join(to_write)) |
b |
diff -r 4137b492233e -r d7e9a3c41657 customizemetadata.py --- a/customizemetadata.py Wed May 19 17:02:37 2021 +0000 +++ b/customizemetadata.py Tue Feb 07 19:46:09 2023 +0000 |
[ |
@@ -6,8 +6,12 @@ import json import pickle import re +import sys +from importlib.metadata import version from pathlib import Path +from packaging.version import Version + def load_from_json(json_fp): ''' @@ -56,6 +60,7 @@ 'taxonomy': in_metadata['taxonomy'], 'merged_taxon': {} } + # transform merged_taxons tuple keys to string for k in in_metadata['merged_taxon']: n = ' , '.join(k) @@ -65,6 +70,38 @@ dump_to_json(out_metadata, json_fp) +def validate_map_version(infile, file_type): + ''' + Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0). + + :param infile: Path to input Pickle/JSON file + :param file_type: String definining file type, pkl or JSON. Case-insensitive + ''' + file_type = file_type.lower() + if file_type == 'pkl' or file_type == 'pickle': + # load metadata from Pickle file + with bz2.BZ2File(infile, 'r') as pkl_f: + in_metadata = pickle.load(pkl_f) + elif file_type == 'json': + in_metadata = load_from_json(infile) + else: + raise ValueError("Unsupported file type to validate.") + + # Get metaphlan version in $PATH + metaphlan_version = Version(version('metaphlan')) + + # Ensure that there are 8 taxonomy levels separated with "|"s. + # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) + # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) + for k in in_metadata['taxonomy']: + if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')): + # raise ValueError("Missing/Extra values in GCA list") + print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version)) + sys.exit(42) + + print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version)) + + def transform_json_to_pkl(json_fp, pkl_fp): ''' Read JSON file and drop it to a Pickle file @@ -80,6 +117,7 @@ 'taxonomy': in_metadata['taxonomy'], 'merged_taxon': {} } + # transform merged_taxons keys to tuple for k in in_metadata['merged_taxon']: n = ' , '.split(k) @@ -448,8 +486,10 @@ args = parser.parse_args() if args.function == 'transform_pkl_to_json': + validate_map_version(Path(args.pkl), 'pkl') transform_pkl_to_json(Path(args.pkl), Path(args.json)) elif args.function == 'transform_json_to_pkl': + validate_map_version(Path(args.json), 'json') transform_json_to_pkl(Path(args.json), Path(args.pkl)) elif args.function == 'add_marker': add_marker( |
b |
diff -r 4137b492233e -r d7e9a3c41657 macros.xml --- a/macros.xml Wed May 19 17:02:37 2021 +0000 +++ b/macros.xml Tue Feb 07 19:46:09 2023 +0000 |
b |
@@ -1,8 +1,8 @@ <?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">3.0.0</token> - <token name="@VERSION_SUFFIX@">1</token> - <token name="@PROFILE@">20.01</token> + <token name="@TOOL_VERSION@">3.6.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">22.01</token> <xml name="edam_ontology"> <edam_topics> <edam_topic>topic_3174</edam_topic> @@ -32,6 +32,7 @@ </token> <xml name="citations"> <citations> + <citation type="doi">10.7554/eLife.65088</citation> <citation type="doi">10.1371/journal.pcbi.1002358</citation> </citations> </xml> |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/barplot2.pdf |
b |
Binary file test-data/barplot2.pdf has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/metaphlan_database.loc --- a/test-data/metaphlan_database.loc Wed May 19 17:02:37 2021 +0000 +++ b/test-data/metaphlan_database.loc Tue Feb 07 19:46:09 2023 +0000 |
b |
@@ -3,4 +3,5 @@ # - db-name # - build # - /path/to/data -metaphlan-demo-db-20210421 MetaPhlan Test Database demo-db-v30 ${__HERE__}/test-db/metaphlan-db \ No newline at end of file +metaphlan-demo-db-20210421 MetaPhlan Test Database demo-db-v30 ${__HERE__}/test-db/metaphlan-db +metaphlan-db-old-structure MetaPhlan Test Database demo-db-v30 ${__HERE__}/test-db/metaphlan-db/old-structure |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/customizemapping.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/metaphlan-db/customizemapping.py Tue Feb 07 19:46:09 2023 +0000 |
[ |
@@ -0,0 +1,50 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Script to generate a extract a custom mapping file from input mapping file. +# Mostly used for a reduced-size demo data generation. + + +import argparse +from pathlib import Path + + +if __name__ == '__main__': + # Read command line + parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping') + parser.add_argument('--in_mapping', help="Path to mapping file to reduce") + parser.add_argument('--features', help="Path to tabular file with features to keep in first column") + parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns") + parser.add_argument('--out_mapping', help="Path to reduced mapping file") + args = parser.parse_args() + + in_mapping_fp = Path(args.in_mapping) + feature_fp = Path(args.features) + element_fp = Path(args.elements) + out_mapping_fp = Path(args.out_mapping) + + # extract features to keep + features = set() + with open(feature_fp, 'r') as feature_f: + for line in feature_f.readlines(): + features.add(line.split("\t")[0]) + print(features) + + # extract elements to keep + elements = set() + with open(element_fp, 'r') as element_f: + for line in element_f.readlines(): + elements.add(line.split("\t")[0]) + print(elements) + + # write mapping for features to keep while keeping only elements + with open(in_mapping_fp, 'r') as in_mapping_f: + with open(out_mapping_fp, 'w') as out_mapping_f: + for line in in_mapping_f.readlines(): + l_split = line.split("\t") + feat = l_split[0] + if feat in features: + to_write = [feat] + for e in l_split[1:]: + if e in elements: + to_write.append(e) + out_mapping_f.write("%s\n" % '\t'.join(to_write)) |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.1.bt2 |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.1.bt2 has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.1.bt2l |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.1.bt2l has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.2.bt2 |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.2.bt2 has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.2.bt2l |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.2.bt2l has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.3.bt2 |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.3.bt2 has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.3.bt2l |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.3.bt2l has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.4.bt2 |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.4.bt2 has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.4.bt2l |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.4.bt2l has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.json --- a/test-data/test-db/metaphlan-db/demo-db-v30.json Wed May 19 17:02:37 2021 +0000 +++ b/test-data/test-db/metaphlan-db/demo-db-v30.json Tue Feb 07 19:46:09 2023 +0000 |
[ |
b'@@ -1,1 +1,1 @@\n-{"markers": {"189722__E0NQU7__HMPREF0658_0548": {"clade": "s__Prevotella_marshii", "ext": ["GCA_002161435", "GCA_000598585", "GCA_000699765", "GCA_900445525", "GCA_000178195", "GCA_000273725", "GCA_002811025", "GCA_000598905", "GCA_000598245", "GCA_900114865", "GCA_000699705", "GCA_000157015", "GCA_000155815", "GCA_000601115", "GCA_002811085", "GCA_000699785", "GCA_002894165", "GCA_000185605", "GCA_000273035", "GCA_001405475", "GCA_001548195", "GCA_001693695", "GCA_000273095", "GCA_001274835", "GCA_000759305", "GCA_000307435", "GCA_002160605", "GCA_000598305", "GCA_003439865", "GCA_001405955", "GCA_000598185", "GCA_000273055", "GCA_002884635", "GCA_002529225", "GCA_002811035", "GCA_000273155", "GCA_000759045", "GCA_000273315", "GCA_001405935", "GCA_001682215", "GCA_900167355", "GCA_000599065", "GCA_000577955", "GCA_000177075", "GCA_001406015", "GCA_002161135", "GCA_000598745", "GCA_000598885", "GCA_001314995", "GCA_000598425", "GCA_002222615", "GCA_000598785", "GCA_003438465", "GCA_000403175", "GCA_000154205", "GCA_900102645", "GCA_001398115", "GCA_900129535", "GCA_900454945", "GCA_003515045", "GCA_000599245", "GCA_000210495", "GCA_000156075", "GCA_003439285", "GCA_002959625", "GCA_000710365", "GCA_003437875", "GCA_000759245", "GCA_002797185", "GCA_000144405", "GCA_001405055", "GCA_002959715", "GCA_001574405", "GCA_000599365", "GCA_000177355", "GCA_001412315", "GCA_000759315", "GCA_000068585", "GCA_000699725", "GCA_001406095", "GCA_001552775", "GCA_001398395", "GCA_900095495", "GCA_001578575", "GCA_003437415", "GCA_000177055", "GCA_000598325", "GCA_000599305", "GCA_001552765", "GCA_001398375", "GCA_003539055", "GCA_003438835", "GCA_000025985", "GCA_003526655", "GCA_000955645", "GCA_900454835", "GCA_000273275", "GCA_001406715", "GCA_001217505", "GCA_002763975", "GCA_000699865", "GCA_000598665", "GCA_003466465", "GCA_000218325", "GCA_000598805", "GCA_002762425", "GCA_000599225", "GCA_001953935", "GCA_000599345", "GCA_002871515", "GCA_002763745", "GCA_001546595", "GCA_000699845", "GCA_001406315", "GCA_000158335", "GCA_003438705", "GCA_002204405", "GCA_002763715", "GCA_000297735", "GCA_002529435", "GCA_002753835", "GCA_000273135", "GCA_002763575", "GCA_000178295", "GCA_003439685", "GCA_000599205", "GCA_000273215", "GCA_001406635", "GCA_000218345", "GCA_002810995", "GCA_003438235", "GCA_000382445", "GCA_001405735", "GCA_000599285", "GCA_003386475", "GCA_001553225", "GCA_000766005", "GCA_001405155", "GCA_003438895", "GCA_003437605", "GCA_003437205", "GCA_003438205", "GCA_000599105", "GCA_002160595", "GCA_003436935", "GCA_003436175", "GCA_000598545", "GCA_001405515", "GCA_000261025", "GCA_003363235", "GCA_000307455", "GCA_000598285", "GCA_003439415", "GCA_003503335", "GCA_000169015", "GCA_000598165", "GCA_003436085", "GCA_900107315", "GCA_000601055", "GCA_000273785", "GCA_003437005", "GCA_000210835", "GCA_000598825", "GCA_002959635", "GCA_000738045", "GCA_000297755", "GCA_002161115", "GCA_000759165", "GCA_000273115", "GCA_000403235", "GCA_003439505", "GCA_000185845", "GCA_003438765", "GCA_000193395", "GCA_000210075", "GCA_002797155", "GCA_002161715", "GCA_001699865", "GCA_000273295", "GCA_000026905", "GCA_000738065", "GCA_003265025", "GCA_003436855", "GCA_000273075", "GCA_002206325", "GCA_002763535", "GCA_002794335", "GCA_003437545", "GCA_000759185", "GCA_001546565", "GCA_000012825", "GCA_001406135", "GCA_000598925", "GCA_000699665", "GCA_900445515", "GCA_000068525", "GCA_000598445", "GCA_000577295", "GCA_000699885", "GCA_003201715", "GCA_000759265", "GCA_900107825", "GCA_900107475", "GCA_003466305", "GCA_900100465", "GCA_001404375", "GCA_000598565", "GCA_001405775", "GCA_000191765", "GCA_003439225", "GCA_000177315", "GCA_001404395", "GCA_000598505", "GCA_000699905", "GCA_001915605", "GCA_001580095", "GCA_000163035", "GCA_000154125", "GCA_001405595", "GCA_000178275", "GCA_003438645", "GCA_001953955", "GCA_000598985", "GCA_000012845", "GCA_000762405", "GCA_900109635", "GCA_003436285", "GCA_000598945", "GCA_000724815"'..b'ceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577295": ["2|976|200643|171549|815|816|371601|", 6484037], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_002161115": ["2|976|200643|171549|815|816|371601|", 5692802], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000178215": ["2|976|200643|171549|815|816|371601|", 6059812], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577955": ["2|976|200643|171549|815|816|371601|", 6228594], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000273315": ["2|976|200643|171549|815|816|371601|", 6067695], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900107825": ["2|976|200643|171549|815|816|371601|", 6131743], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000210075": ["2|976|200643|171549|815|816|371601|", 5976145], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900114865": ["2|976|200643|171549|815|816|371601|", 5867942], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_salanitronis|t__GCA_000190575": ["2|976|200643|171549|815|816|376805|", 4308663], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_coprophilus|t__GCA_000157915": ["2|976|200643|171549|815|816|387090|", 4041504], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_D2|t__GCA_000159075": ["2|976|200643|171549|815|816|556259|", 6920457], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_001915515": ["2|976|200643|171549|815|816|626931|", 6012549], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_000315485": ["2|976|200643|171549|815|816|626931|", 7087734], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_003438445": ["2|976|200643|171549|815|816|626931|", 6457077], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sartorii|t__GCA_000403195": ["2|976|200643|171549|815|816|671267|", 5464209], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_reticulotermitis|t__GCA_000517545": ["2|976|200643|171549|815|816|1133319|", 5365278], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_002221665": ["2|976|200643|171549|815|816|1796613|", 4800416], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_001688725": ["2|976|200643|171549|815|816|1796613|", 4839927], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_43_108|t__GCA_001915545": ["2|976|200643|171549|815|816|1896974|", 5012994], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_OM05_12|t__GCA_003438995": ["2|976|200643|171549|815|816|2292283|", 4475735], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae|g__Rikenella|s__Rikenella_microfusus|t__GCA_900455755": ["2|976|200643|171549|171550|28138|28139|", 2945869]}, "merged_taxon": {}}\n\\ No newline at end of file\n' |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.pkl |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.pkl has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2 |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2 has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2l |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2l has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2 |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2 has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2l |
b |
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2l has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/old-structure/demo-db-v30.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-db/metaphlan-db/old-structure/demo-db-v30.json Tue Feb 07 19:46:09 2023 +0000 |
[ |
b'@@ -0,0 +1,1 @@\n+{"markers": {"189722__E0NQU7__HMPREF0658_0548": {"clade": "s__Prevotella_marshii", "ext": ["GCA_002161435", "GCA_000598585", "GCA_000699765", "GCA_900445525", "GCA_000178195", "GCA_000273725", "GCA_002811025", "GCA_000598905", "GCA_000598245", "GCA_900114865", "GCA_000699705", "GCA_000157015", "GCA_000155815", "GCA_000601115", "GCA_002811085", "GCA_000699785", "GCA_002894165", "GCA_000185605", "GCA_000273035", "GCA_001405475", "GCA_001548195", "GCA_001693695", "GCA_000273095", "GCA_001274835", "GCA_000759305", "GCA_000307435", "GCA_002160605", "GCA_000598305", "GCA_003439865", "GCA_001405955", "GCA_000598185", "GCA_000273055", "GCA_002884635", "GCA_002529225", "GCA_002811035", "GCA_000273155", "GCA_000759045", "GCA_000273315", "GCA_001405935", "GCA_001682215", "GCA_900167355", "GCA_000599065", "GCA_000577955", "GCA_000177075", "GCA_001406015", "GCA_002161135", "GCA_000598745", "GCA_000598885", "GCA_001314995", "GCA_000598425", "GCA_002222615", "GCA_000598785", "GCA_003438465", "GCA_000403175", "GCA_000154205", "GCA_900102645", "GCA_001398115", "GCA_900129535", "GCA_900454945", "GCA_003515045", "GCA_000599245", "GCA_000210495", "GCA_000156075", "GCA_003439285", "GCA_002959625", "GCA_000710365", "GCA_003437875", "GCA_000759245", "GCA_002797185", "GCA_000144405", "GCA_001405055", "GCA_002959715", "GCA_001574405", "GCA_000599365", "GCA_000177355", "GCA_001412315", "GCA_000759315", "GCA_000068585", "GCA_000699725", "GCA_001406095", "GCA_001552775", "GCA_001398395", "GCA_900095495", "GCA_001578575", "GCA_003437415", "GCA_000177055", "GCA_000598325", "GCA_000599305", "GCA_001552765", "GCA_001398375", "GCA_003539055", "GCA_003438835", "GCA_000025985", "GCA_003526655", "GCA_000955645", "GCA_900454835", "GCA_000273275", "GCA_001406715", "GCA_001217505", "GCA_002763975", "GCA_000699865", "GCA_000598665", "GCA_003466465", "GCA_000218325", "GCA_000598805", "GCA_002762425", "GCA_000599225", "GCA_001953935", "GCA_000599345", "GCA_002871515", "GCA_002763745", "GCA_001546595", "GCA_000699845", "GCA_001406315", "GCA_000158335", "GCA_003438705", "GCA_002204405", "GCA_002763715", "GCA_000297735", "GCA_002529435", "GCA_002753835", "GCA_000273135", "GCA_002763575", "GCA_000178295", "GCA_003439685", "GCA_000599205", "GCA_000273215", "GCA_001406635", "GCA_000218345", "GCA_002810995", "GCA_003438235", "GCA_000382445", "GCA_001405735", "GCA_000599285", "GCA_003386475", "GCA_001553225", "GCA_000766005", "GCA_001405155", "GCA_003438895", "GCA_003437605", "GCA_003437205", "GCA_003438205", "GCA_000599105", "GCA_002160595", "GCA_003436935", "GCA_003436175", "GCA_000598545", "GCA_001405515", "GCA_000261025", "GCA_003363235", "GCA_000307455", "GCA_000598285", "GCA_003439415", "GCA_003503335", "GCA_000169015", "GCA_000598165", "GCA_003436085", "GCA_900107315", "GCA_000601055", "GCA_000273785", "GCA_003437005", "GCA_000210835", "GCA_000598825", "GCA_002959635", "GCA_000738045", "GCA_000297755", "GCA_002161115", "GCA_000759165", "GCA_000273115", "GCA_000403235", "GCA_003439505", "GCA_000185845", "GCA_003438765", "GCA_000193395", "GCA_000210075", "GCA_002797155", "GCA_002161715", "GCA_001699865", "GCA_000273295", "GCA_000026905", "GCA_000738065", "GCA_003265025", "GCA_003436855", "GCA_000273075", "GCA_002206325", "GCA_002763535", "GCA_002794335", "GCA_003437545", "GCA_000759185", "GCA_001546565", "GCA_000012825", "GCA_001406135", "GCA_000598925", "GCA_000699665", "GCA_900445515", "GCA_000068525", "GCA_000598445", "GCA_000577295", "GCA_000699885", "GCA_003201715", "GCA_000759265", "GCA_900107825", "GCA_900107475", "GCA_003466305", "GCA_900100465", "GCA_001404375", "GCA_000598565", "GCA_001405775", "GCA_000191765", "GCA_003439225", "GCA_000177315", "GCA_001404395", "GCA_000598505", "GCA_000699905", "GCA_001915605", "GCA_001580095", "GCA_000163035", "GCA_000154125", "GCA_001405595", "GCA_000178275", "GCA_003438645", "GCA_001953955", "GCA_000598985", "GCA_000012845", "GCA_000762405", "GCA_900109635", "GCA_003436285", "GCA_000598945", "GCA_000724815"'..b'oidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577295": ["2|976|200643|171549|815|816|371601", 6484037], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_002161115": ["2|976|200643|171549|815|816|371601", 5692802], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000178215": ["2|976|200643|171549|815|816|371601", 6059812], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577955": ["2|976|200643|171549|815|816|371601", 6228594], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000273315": ["2|976|200643|171549|815|816|371601", 6067695], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900107825": ["2|976|200643|171549|815|816|371601", 6131743], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000210075": ["2|976|200643|171549|815|816|371601", 5976145], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900114865": ["2|976|200643|171549|815|816|371601", 5867942], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_salanitronis|t__GCA_000190575": ["2|976|200643|171549|815|816|376805", 4308663], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_coprophilus|t__GCA_000157915": ["2|976|200643|171549|815|816|387090", 4041504], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_D2|t__GCA_000159075": ["2|976|200643|171549|815|816|556259", 6920457], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_001915515": ["2|976|200643|171549|815|816|626931", 6012549], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_000315485": ["2|976|200643|171549|815|816|626931", 7087734], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_003438445": ["2|976|200643|171549|815|816|626931", 6457077], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sartorii|t__GCA_000403195": ["2|976|200643|171549|815|816|671267", 5464209], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_reticulotermitis|t__GCA_000517545": ["2|976|200643|171549|815|816|1133319", 5365278], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_002221665": ["2|976|200643|171549|815|816|1796613", 4800416], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_001688725": ["2|976|200643|171549|815|816|1796613", 4839927], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_43_108|t__GCA_001915545": ["2|976|200643|171549|815|816|1896974", 5012994], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_OM05_12|t__GCA_003438995": ["2|976|200643|171549|815|816|2292283", 4475735], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae|g__Rikenella|s__Rikenella_microfusus|t__GCA_900455755": ["2|976|200643|171549|171550|28138|28139", 2945869]}, "merged_taxon": {}}\n\\ No newline at end of file\n' |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/metaphlan-db/old-structure/demo-db-v30.pkl |
b |
Binary file test-data/test-db/metaphlan-db/old-structure/demo-db-v30.pkl has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz |
b |
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz |
b |
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz |
b |
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz |
b |
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz has changed |
b |
diff -r 4137b492233e -r d7e9a3c41657 transform_json_to_pkl.py --- a/transform_json_to_pkl.py Wed May 19 17:02:37 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,30 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import bz2 -import cPickle as pickle -import json - - -def transform_json_to_pkl(args): - with open(args.json_input, 'r') as json_file: - json_str = json_file.read() - metadata = json.loads(json_str) - - for marker in metadata["markers"]: - a_set = set(metadata["markers"][marker]["ext"]) - metadata["markers"][marker]["ext"] = a_set - - pkl_output = bz2.BZ2File(args.pkl_output, 'w') - pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL) - pkl_output.close() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--json_input', required=True) - parser.add_argument('--pkl_output', required=True) - args = parser.parse_args() - - transform_json_to_pkl(args) |