Repository 'humann_reduce_table'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/humann_reduce_table

Changeset 2:d122106c317f (2023-02-07)
Previous changeset 1:324c12894d59 (2021-05-19) Next changeset 3:7ebfd8d889b9 (2023-03-29)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
modified:
customizemetadata.py
macros.xml
test-data/barplot2.pdf
test-data/metaphlan_database.loc
test-data/test-db/metaphlan-db/demo-db-v30.json
test-data/test-db/metaphlan-db/demo-db-v30.pkl
added:
test-data/test-db/metaphlan-db/customizemapping.py
test-data/test-db/metaphlan-db/demo-db-v30.1.bt2l
test-data/test-db/metaphlan-db/demo-db-v30.2.bt2l
test-data/test-db/metaphlan-db/demo-db-v30.3.bt2l
test-data/test-db/metaphlan-db/demo-db-v30.4.bt2l
test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2l
test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2l
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.1.bt2
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.2.bt2
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.3.bt2
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.4.bt2
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.json
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.pkl
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.rev.1.bt2
test-data/test-db/metaphlan-db/old-structure/demo-db-v30.rev.2.bt2
test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz
test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz
removed:
customizemapping.py
test-data/test-db/metaphlan-db/demo-db-v30.1.bt2
test-data/test-db/metaphlan-db/demo-db-v30.2.bt2
test-data/test-db/metaphlan-db/demo-db-v30.3.bt2
test-data/test-db/metaphlan-db/demo-db-v30.4.bt2
test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2
test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2
test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz
test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz
transform_json_to_pkl.py
b
diff -r 324c12894d59 -r d122106c317f customizemapping.py
--- a/customizemapping.py Wed May 19 16:58:17 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,47 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import argparse
-from pathlib import Path
-
-
-if __name__ == '__main__':
-    # Read command line
-    parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping')
-    parser.add_argument('--in_mapping', help="Path to mapping file to reduce")
-    parser.add_argument('--features', help="Path to tabular file with features to keep in first column")
-    parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns")
-    parser.add_argument('--out_mapping', help="Path to reduced mapping file")
-    args = parser.parse_args()
-
-    in_mapping_fp = Path(args.in_mapping)
-    feature_fp = Path(args.features)
-    element_fp = Path(args.elements)
-    out_mapping_fp = Path(args.out_mapping)
-
-    # extract features to keep
-    features = set()
-    with open(feature_fp, 'r') as feature_f:
-        for line in feature_f.readlines():
-            features.add(line.split("\t")[0])
-    print(features)
-
-    # extract elements to keep
-    elements = set()
-    with open(element_fp, 'r') as element_f:
-        for line in element_f.readlines():
-            elements.add(line.split("\t")[0])
-    print(elements)
-
-    # write mapping for features to keep while keeping only elements
-    with open(in_mapping_fp, 'r') as in_mapping_f:
-        with open(out_mapping_fp, 'w') as out_mapping_f:
-            for line in in_mapping_f.readlines():
-                l_split = line.split("\t")
-                feat = l_split[0]
-                if feat in features:
-                    to_write = [feat]
-                    for e in l_split[1:]:
-                        if e in elements:
-                            to_write.append(e)
-                    out_mapping_f.write("%s\n" % '\t'.join(to_write))
b
diff -r 324c12894d59 -r d122106c317f customizemetadata.py
--- a/customizemetadata.py Wed May 19 16:58:17 2021 +0000
+++ b/customizemetadata.py Tue Feb 07 19:49:34 2023 +0000
[
@@ -6,8 +6,12 @@
 import json
 import pickle
 import re
+import sys
+from importlib.metadata import version
 from pathlib import Path
 
+from packaging.version import Version
+
 
 def load_from_json(json_fp):
     '''
@@ -56,6 +60,7 @@
         'taxonomy': in_metadata['taxonomy'],
         'merged_taxon': {}
     }
+
     # transform merged_taxons tuple keys to string
     for k in in_metadata['merged_taxon']:
         n = ' , '.join(k)
@@ -65,6 +70,38 @@
     dump_to_json(out_metadata, json_fp)
 
 
+def validate_map_version(infile, file_type):
+    '''
+    Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0).
+
+    :param infile: Path to input Pickle/JSON file
+    :param file_type: String definining file type, pkl or JSON. Case-insensitive
+    '''
+    file_type = file_type.lower()
+    if file_type == 'pkl' or file_type == 'pickle':
+        # load metadata from Pickle file
+        with bz2.BZ2File(infile, 'r') as pkl_f:
+            in_metadata = pickle.load(pkl_f)
+    elif file_type == 'json':
+        in_metadata = load_from_json(infile)
+    else:
+        raise ValueError("Unsupported file type to validate.")
+
+    # Get metaphlan version in $PATH
+    metaphlan_version = Version(version('metaphlan'))
+
+    # Ensure that there are 8 taxonomy levels separated with "|"s.
+    # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432)
+    # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432)
+    for k in in_metadata['taxonomy']:
+        if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')):
+            # raise ValueError("Missing/Extra values in GCA list")
+            print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version))
+            sys.exit(42)
+
+    print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version))
+
+
 def transform_json_to_pkl(json_fp, pkl_fp):
     '''
     Read JSON file and drop it to a Pickle file
@@ -80,6 +117,7 @@
         'taxonomy': in_metadata['taxonomy'],
         'merged_taxon': {}
     }
+
     # transform merged_taxons keys to tuple
     for k in in_metadata['merged_taxon']:
         n = ' , '.split(k)
@@ -448,8 +486,10 @@
     args = parser.parse_args()
 
     if args.function == 'transform_pkl_to_json':
+        validate_map_version(Path(args.pkl), 'pkl')
         transform_pkl_to_json(Path(args.pkl), Path(args.json))
     elif args.function == 'transform_json_to_pkl':
+        validate_map_version(Path(args.json), 'json')
         transform_json_to_pkl(Path(args.json), Path(args.pkl))
     elif args.function == 'add_marker':
         add_marker(
b
diff -r 324c12894d59 -r d122106c317f macros.xml
--- a/macros.xml Wed May 19 16:58:17 2021 +0000
+++ b/macros.xml Tue Feb 07 19:49:34 2023 +0000
b
@@ -1,8 +1,8 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">3.0.0</token>
-    <token name="@VERSION_SUFFIX@">1</token>
-    <token name="@PROFILE@">20.01</token>
+    <token name="@TOOL_VERSION@">3.6.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">22.01</token>
     <xml name="edam_ontology">
         <edam_topics>
             <edam_topic>topic_3174</edam_topic>
@@ -32,6 +32,7 @@
     </token>
     <xml name="citations">
         <citations>
+            <citation type="doi">10.7554/eLife.65088</citation>
             <citation type="doi">10.1371/journal.pcbi.1002358</citation>
         </citations>
     </xml>
b
diff -r 324c12894d59 -r d122106c317f test-data/barplot2.pdf
b
Binary file test-data/barplot2.pdf has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/metaphlan_database.loc
--- a/test-data/metaphlan_database.loc Wed May 19 16:58:17 2021 +0000
+++ b/test-data/metaphlan_database.loc Tue Feb 07 19:49:34 2023 +0000
b
@@ -3,4 +3,5 @@
 # - db-name
 # - build
 # - /path/to/data 
-metaphlan-demo-db-20210421 MetaPhlan Test Database demo-db-v30 ${__HERE__}/test-db/metaphlan-db
\ No newline at end of file
+metaphlan-demo-db-20210421 MetaPhlan Test Database demo-db-v30 ${__HERE__}/test-db/metaphlan-db
+metaphlan-db-old-structure MetaPhlan Test Database demo-db-v30 ${__HERE__}/test-db/metaphlan-db/old-structure
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/customizemapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/metaphlan-db/customizemapping.py Tue Feb 07 19:49:34 2023 +0000
[
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Script to generate a extract a custom mapping file from input mapping file.
+# Mostly used for a reduced-size demo data generation.
+
+
+import argparse
+from pathlib import Path
+
+
+if __name__ == '__main__':
+    # Read command line
+    parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping')
+    parser.add_argument('--in_mapping', help="Path to mapping file to reduce")
+    parser.add_argument('--features', help="Path to tabular file with features to keep in first column")
+    parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns")
+    parser.add_argument('--out_mapping', help="Path to reduced mapping file")
+    args = parser.parse_args()
+
+    in_mapping_fp = Path(args.in_mapping)
+    feature_fp = Path(args.features)
+    element_fp = Path(args.elements)
+    out_mapping_fp = Path(args.out_mapping)
+
+    # extract features to keep
+    features = set()
+    with open(feature_fp, 'r') as feature_f:
+        for line in feature_f.readlines():
+            features.add(line.split("\t")[0])
+    print(features)
+
+    # extract elements to keep
+    elements = set()
+    with open(element_fp, 'r') as element_f:
+        for line in element_f.readlines():
+            elements.add(line.split("\t")[0])
+    print(elements)
+
+    # write mapping for features to keep while keeping only elements
+    with open(in_mapping_fp, 'r') as in_mapping_f:
+        with open(out_mapping_fp, 'w') as out_mapping_f:
+            for line in in_mapping_f.readlines():
+                l_split = line.split("\t")
+                feat = l_split[0]
+                if feat in features:
+                    to_write = [feat]
+                    for e in l_split[1:]:
+                        if e in elements:
+                            to_write.append(e)
+                    out_mapping_f.write("%s\n" % '\t'.join(to_write))
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.1.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.1.bt2 has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.1.bt2l
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.1.bt2l has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.2.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.2.bt2 has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.2.bt2l
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.2.bt2l has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.3.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.3.bt2 has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.3.bt2l
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.3.bt2l has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.4.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.4.bt2 has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.4.bt2l
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.4.bt2l has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.json
--- a/test-data/test-db/metaphlan-db/demo-db-v30.json Wed May 19 16:58:17 2021 +0000
+++ b/test-data/test-db/metaphlan-db/demo-db-v30.json Tue Feb 07 19:49:34 2023 +0000
[
b'@@ -1,1 +1,1 @@\n-{"markers": {"189722__E0NQU7__HMPREF0658_0548": {"clade": "s__Prevotella_marshii", "ext": ["GCA_002161435", "GCA_000598585", "GCA_000699765", "GCA_900445525", "GCA_000178195", "GCA_000273725", "GCA_002811025", "GCA_000598905", "GCA_000598245", "GCA_900114865", "GCA_000699705", "GCA_000157015", "GCA_000155815", "GCA_000601115", "GCA_002811085", "GCA_000699785", "GCA_002894165", "GCA_000185605", "GCA_000273035", "GCA_001405475", "GCA_001548195", "GCA_001693695", "GCA_000273095", "GCA_001274835", "GCA_000759305", "GCA_000307435", "GCA_002160605", "GCA_000598305", "GCA_003439865", "GCA_001405955", "GCA_000598185", "GCA_000273055", "GCA_002884635", "GCA_002529225", "GCA_002811035", "GCA_000273155", "GCA_000759045", "GCA_000273315", "GCA_001405935", "GCA_001682215", "GCA_900167355", "GCA_000599065", "GCA_000577955", "GCA_000177075", "GCA_001406015", "GCA_002161135", "GCA_000598745", "GCA_000598885", "GCA_001314995", "GCA_000598425", "GCA_002222615", "GCA_000598785", "GCA_003438465", "GCA_000403175", "GCA_000154205", "GCA_900102645", "GCA_001398115", "GCA_900129535", "GCA_900454945", "GCA_003515045", "GCA_000599245", "GCA_000210495", "GCA_000156075", "GCA_003439285", "GCA_002959625", "GCA_000710365", "GCA_003437875", "GCA_000759245", "GCA_002797185", "GCA_000144405", "GCA_001405055", "GCA_002959715", "GCA_001574405", "GCA_000599365", "GCA_000177355", "GCA_001412315", "GCA_000759315", "GCA_000068585", "GCA_000699725", "GCA_001406095", "GCA_001552775", "GCA_001398395", "GCA_900095495", "GCA_001578575", "GCA_003437415", "GCA_000177055", "GCA_000598325", "GCA_000599305", "GCA_001552765", "GCA_001398375", "GCA_003539055", "GCA_003438835", "GCA_000025985", "GCA_003526655", "GCA_000955645", "GCA_900454835", "GCA_000273275", "GCA_001406715", "GCA_001217505", "GCA_002763975", "GCA_000699865", "GCA_000598665", "GCA_003466465", "GCA_000218325", "GCA_000598805", "GCA_002762425", "GCA_000599225", "GCA_001953935", "GCA_000599345", "GCA_002871515", "GCA_002763745", "GCA_001546595", "GCA_000699845", "GCA_001406315", "GCA_000158335", "GCA_003438705", "GCA_002204405", "GCA_002763715", "GCA_000297735", "GCA_002529435", "GCA_002753835", "GCA_000273135", "GCA_002763575", "GCA_000178295", "GCA_003439685", "GCA_000599205", "GCA_000273215", "GCA_001406635", "GCA_000218345", "GCA_002810995", "GCA_003438235", "GCA_000382445", "GCA_001405735", "GCA_000599285", "GCA_003386475", "GCA_001553225", "GCA_000766005", "GCA_001405155", "GCA_003438895", "GCA_003437605", "GCA_003437205", "GCA_003438205", "GCA_000599105", "GCA_002160595", "GCA_003436935", "GCA_003436175", "GCA_000598545", "GCA_001405515", "GCA_000261025", "GCA_003363235", "GCA_000307455", "GCA_000598285", "GCA_003439415", "GCA_003503335", "GCA_000169015", "GCA_000598165", "GCA_003436085", "GCA_900107315", "GCA_000601055", "GCA_000273785", "GCA_003437005", "GCA_000210835", "GCA_000598825", "GCA_002959635", "GCA_000738045", "GCA_000297755", "GCA_002161115", "GCA_000759165", "GCA_000273115", "GCA_000403235", "GCA_003439505", "GCA_000185845", "GCA_003438765", "GCA_000193395", "GCA_000210075", "GCA_002797155", "GCA_002161715", "GCA_001699865", "GCA_000273295", "GCA_000026905", "GCA_000738065", "GCA_003265025", "GCA_003436855", "GCA_000273075", "GCA_002206325", "GCA_002763535", "GCA_002794335", "GCA_003437545", "GCA_000759185", "GCA_001546565", "GCA_000012825", "GCA_001406135", "GCA_000598925", "GCA_000699665", "GCA_900445515", "GCA_000068525", "GCA_000598445", "GCA_000577295", "GCA_000699885", "GCA_003201715", "GCA_000759265", "GCA_900107825", "GCA_900107475", "GCA_003466305", "GCA_900100465", "GCA_001404375", "GCA_000598565", "GCA_001405775", "GCA_000191765", "GCA_003439225", "GCA_000177315", "GCA_001404395", "GCA_000598505", "GCA_000699905", "GCA_001915605", "GCA_001580095", "GCA_000163035", "GCA_000154125", "GCA_001405595", "GCA_000178275", "GCA_003438645", "GCA_001953955", "GCA_000598985", "GCA_000012845", "GCA_000762405", "GCA_900109635", "GCA_003436285", "GCA_000598945", "GCA_000724815"'..b'ceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577295": ["2|976|200643|171549|815|816|371601|", 6484037], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_002161115": ["2|976|200643|171549|815|816|371601|", 5692802], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000178215": ["2|976|200643|171549|815|816|371601|", 6059812], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577955": ["2|976|200643|171549|815|816|371601|", 6228594], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000273315": ["2|976|200643|171549|815|816|371601|", 6067695], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900107825": ["2|976|200643|171549|815|816|371601|", 6131743], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000210075": ["2|976|200643|171549|815|816|371601|", 5976145], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900114865": ["2|976|200643|171549|815|816|371601|", 5867942], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_salanitronis|t__GCA_000190575": ["2|976|200643|171549|815|816|376805|", 4308663], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_coprophilus|t__GCA_000157915": ["2|976|200643|171549|815|816|387090|", 4041504], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_D2|t__GCA_000159075": ["2|976|200643|171549|815|816|556259|", 6920457], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_001915515": ["2|976|200643|171549|815|816|626931|", 6012549], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_000315485": ["2|976|200643|171549|815|816|626931|", 7087734], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_003438445": ["2|976|200643|171549|815|816|626931|", 6457077], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sartorii|t__GCA_000403195": ["2|976|200643|171549|815|816|671267|", 5464209], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_reticulotermitis|t__GCA_000517545": ["2|976|200643|171549|815|816|1133319|", 5365278], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_002221665": ["2|976|200643|171549|815|816|1796613|", 4800416], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_001688725": ["2|976|200643|171549|815|816|1796613|", 4839927], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_43_108|t__GCA_001915545": ["2|976|200643|171549|815|816|1896974|", 5012994], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_OM05_12|t__GCA_003438995": ["2|976|200643|171549|815|816|2292283|", 4475735], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae|g__Rikenella|s__Rikenella_microfusus|t__GCA_900455755": ["2|976|200643|171549|171550|28138|28139|", 2945869]}, "merged_taxon": {}}\n\\ No newline at end of file\n'
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.pkl
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.pkl has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2 has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2l
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2l has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2 has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2l
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2l has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/old-structure/demo-db-v30.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/metaphlan-db/old-structure/demo-db-v30.json Tue Feb 07 19:49:34 2023 +0000
[
b'@@ -0,0 +1,1 @@\n+{"markers": {"189722__E0NQU7__HMPREF0658_0548": {"clade": "s__Prevotella_marshii", "ext": ["GCA_002161435", "GCA_000598585", "GCA_000699765", "GCA_900445525", "GCA_000178195", "GCA_000273725", "GCA_002811025", "GCA_000598905", "GCA_000598245", "GCA_900114865", "GCA_000699705", "GCA_000157015", "GCA_000155815", "GCA_000601115", "GCA_002811085", "GCA_000699785", "GCA_002894165", "GCA_000185605", "GCA_000273035", "GCA_001405475", "GCA_001548195", "GCA_001693695", "GCA_000273095", "GCA_001274835", "GCA_000759305", "GCA_000307435", "GCA_002160605", "GCA_000598305", "GCA_003439865", "GCA_001405955", "GCA_000598185", "GCA_000273055", "GCA_002884635", "GCA_002529225", "GCA_002811035", "GCA_000273155", "GCA_000759045", "GCA_000273315", "GCA_001405935", "GCA_001682215", "GCA_900167355", "GCA_000599065", "GCA_000577955", "GCA_000177075", "GCA_001406015", "GCA_002161135", "GCA_000598745", "GCA_000598885", "GCA_001314995", "GCA_000598425", "GCA_002222615", "GCA_000598785", "GCA_003438465", "GCA_000403175", "GCA_000154205", "GCA_900102645", "GCA_001398115", "GCA_900129535", "GCA_900454945", "GCA_003515045", "GCA_000599245", "GCA_000210495", "GCA_000156075", "GCA_003439285", "GCA_002959625", "GCA_000710365", "GCA_003437875", "GCA_000759245", "GCA_002797185", "GCA_000144405", "GCA_001405055", "GCA_002959715", "GCA_001574405", "GCA_000599365", "GCA_000177355", "GCA_001412315", "GCA_000759315", "GCA_000068585", "GCA_000699725", "GCA_001406095", "GCA_001552775", "GCA_001398395", "GCA_900095495", "GCA_001578575", "GCA_003437415", "GCA_000177055", "GCA_000598325", "GCA_000599305", "GCA_001552765", "GCA_001398375", "GCA_003539055", "GCA_003438835", "GCA_000025985", "GCA_003526655", "GCA_000955645", "GCA_900454835", "GCA_000273275", "GCA_001406715", "GCA_001217505", "GCA_002763975", "GCA_000699865", "GCA_000598665", "GCA_003466465", "GCA_000218325", "GCA_000598805", "GCA_002762425", "GCA_000599225", "GCA_001953935", "GCA_000599345", "GCA_002871515", "GCA_002763745", "GCA_001546595", "GCA_000699845", "GCA_001406315", "GCA_000158335", "GCA_003438705", "GCA_002204405", "GCA_002763715", "GCA_000297735", "GCA_002529435", "GCA_002753835", "GCA_000273135", "GCA_002763575", "GCA_000178295", "GCA_003439685", "GCA_000599205", "GCA_000273215", "GCA_001406635", "GCA_000218345", "GCA_002810995", "GCA_003438235", "GCA_000382445", "GCA_001405735", "GCA_000599285", "GCA_003386475", "GCA_001553225", "GCA_000766005", "GCA_001405155", "GCA_003438895", "GCA_003437605", "GCA_003437205", "GCA_003438205", "GCA_000599105", "GCA_002160595", "GCA_003436935", "GCA_003436175", "GCA_000598545", "GCA_001405515", "GCA_000261025", "GCA_003363235", "GCA_000307455", "GCA_000598285", "GCA_003439415", "GCA_003503335", "GCA_000169015", "GCA_000598165", "GCA_003436085", "GCA_900107315", "GCA_000601055", "GCA_000273785", "GCA_003437005", "GCA_000210835", "GCA_000598825", "GCA_002959635", "GCA_000738045", "GCA_000297755", "GCA_002161115", "GCA_000759165", "GCA_000273115", "GCA_000403235", "GCA_003439505", "GCA_000185845", "GCA_003438765", "GCA_000193395", "GCA_000210075", "GCA_002797155", "GCA_002161715", "GCA_001699865", "GCA_000273295", "GCA_000026905", "GCA_000738065", "GCA_003265025", "GCA_003436855", "GCA_000273075", "GCA_002206325", "GCA_002763535", "GCA_002794335", "GCA_003437545", "GCA_000759185", "GCA_001546565", "GCA_000012825", "GCA_001406135", "GCA_000598925", "GCA_000699665", "GCA_900445515", "GCA_000068525", "GCA_000598445", "GCA_000577295", "GCA_000699885", "GCA_003201715", "GCA_000759265", "GCA_900107825", "GCA_900107475", "GCA_003466305", "GCA_900100465", "GCA_001404375", "GCA_000598565", "GCA_001405775", "GCA_000191765", "GCA_003439225", "GCA_000177315", "GCA_001404395", "GCA_000598505", "GCA_000699905", "GCA_001915605", "GCA_001580095", "GCA_000163035", "GCA_000154125", "GCA_001405595", "GCA_000178275", "GCA_003438645", "GCA_001953955", "GCA_000598985", "GCA_000012845", "GCA_000762405", "GCA_900109635", "GCA_003436285", "GCA_000598945", "GCA_000724815"'..b'oidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577295": ["2|976|200643|171549|815|816|371601", 6484037], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_002161115": ["2|976|200643|171549|815|816|371601", 5692802], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000178215": ["2|976|200643|171549|815|816|371601", 6059812], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577955": ["2|976|200643|171549|815|816|371601", 6228594], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000273315": ["2|976|200643|171549|815|816|371601", 6067695], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900107825": ["2|976|200643|171549|815|816|371601", 6131743], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000210075": ["2|976|200643|171549|815|816|371601", 5976145], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900114865": ["2|976|200643|171549|815|816|371601", 5867942], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_salanitronis|t__GCA_000190575": ["2|976|200643|171549|815|816|376805", 4308663], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_coprophilus|t__GCA_000157915": ["2|976|200643|171549|815|816|387090", 4041504], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_D2|t__GCA_000159075": ["2|976|200643|171549|815|816|556259", 6920457], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_001915515": ["2|976|200643|171549|815|816|626931", 6012549], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_000315485": ["2|976|200643|171549|815|816|626931", 7087734], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_003438445": ["2|976|200643|171549|815|816|626931", 6457077], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sartorii|t__GCA_000403195": ["2|976|200643|171549|815|816|671267", 5464209], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_reticulotermitis|t__GCA_000517545": ["2|976|200643|171549|815|816|1133319", 5365278], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_002221665": ["2|976|200643|171549|815|816|1796613", 4800416], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_001688725": ["2|976|200643|171549|815|816|1796613", 4839927], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_43_108|t__GCA_001915545": ["2|976|200643|171549|815|816|1896974", 5012994], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_OM05_12|t__GCA_003438995": ["2|976|200643|171549|815|816|2292283", 4475735], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae|g__Rikenella|s__Rikenella_microfusus|t__GCA_900455755": ["2|976|200643|171549|171550|28138|28139", 2945869]}, "merged_taxon": {}}\n\\ No newline at end of file\n'
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/metaphlan-db/old-structure/demo-db-v30.pkl
b
Binary file test-data/test-db/metaphlan-db/old-structure/demo-db-v30.pkl has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz
b
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz
b
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz
b
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz has changed
b
diff -r 324c12894d59 -r d122106c317f test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz
b
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz has changed
b
diff -r 324c12894d59 -r d122106c317f transform_json_to_pkl.py
--- a/transform_json_to_pkl.py Wed May 19 16:58:17 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import argparse
-import bz2
-import cPickle as pickle
-import json
-
-
-def transform_json_to_pkl(args):
-    with open(args.json_input, 'r') as json_file:
-        json_str = json_file.read()
-        metadata = json.loads(json_str)
-
-        for marker in metadata["markers"]:
-            a_set = set(metadata["markers"][marker]["ext"])
-            metadata["markers"][marker]["ext"] = a_set
-
-    pkl_output = bz2.BZ2File(args.pkl_output, 'w')
-    pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL)
-    pkl_output.close()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--json_input', required=True)
-    parser.add_argument('--pkl_output', required=True)
-    args = parser.parse_args()
-
-    transform_json_to_pkl(args)