Repository 'customize_metaphlan_database'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/customize_metaphlan_database

Changeset 5:892a0629e2ed (2022-12-03)
Previous changeset 4:150ebe4d0fd9 (2022-02-02) Next changeset 6:46fc97bb98f9 (2023-02-27)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
modified:
customizemetadata.py
formatoutput.py
macros.xml
test-data/test-db.json
test-data/test-db/test-db.pkl
added:
test-data/test-db/test-db.1.bt2l
test-data/test-db/test-db.2.bt2l
test-data/test-db/test-db.3.bt2l
test-data/test-db/test-db.4.bt2l
test-data/test-db/test-db.rev.1.bt2l
test-data/test-db/test-db.rev.2.bt2l
removed:
test-data/test-db/test-db.1.bt2
test-data/test-db/test-db.2.bt2
test-data/test-db/test-db.3.bt2
test-data/test-db/test-db.4.bt2
test-data/test-db/test-db.rev.1.bt2
test-data/test-db/test-db.rev.2.bt2
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed customizemetadata.py
--- a/customizemetadata.py Wed Feb 02 22:02:41 2022 +0000
+++ b/customizemetadata.py Sat Dec 03 10:43:00 2022 +0000
[
@@ -56,6 +56,7 @@
         'taxonomy': in_metadata['taxonomy'],
         'merged_taxon': {}
     }
+
     # transform merged_taxons tuple keys to string
     for k in in_metadata['merged_taxon']:
         n = ' , '.join(k)
@@ -85,6 +86,13 @@
         n = ' , '.split(k)
         out_metadata[n] = in_metadata['merged_taxon'][k]
 
+    # Ensure that there are 8 taxonomy levels (for compatibility between Metaphlan v3 and v4)
+    # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432)
+    # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432)
+    for k in out_metadata['taxonomy']:
+        if out_metadata['taxonomy'][k][0].count('|') == 6:
+            out_metadata['taxonomy'][k] = (out_metadata['taxonomy'][k][0] + '|', out_metadata['taxonomy'][k][1])
+
     # dump metadata to Pickle file
     with bz2.BZ2File(pkl_fp, 'w') as pkl_f:
         pickle.dump(out_metadata, pkl_f)
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed formatoutput.py
--- a/formatoutput.py Wed Feb 02 22:02:41 2022 +0000
+++ b/formatoutput.py Sat Dec 03 10:43:00 2022 +0000
[
@@ -57,9 +57,11 @@
                 # skip headers
                 if line.startswith("#"):
                     continue
-                # skip UNKNOWN lines in Predicted taxon relative abundances
-                if "UNKNOWN" in line:
+
+                # skip UNKNOWN (v3) or UNCLASSIFIED (v4) lines in predicted taxon relative abundances
+                if "UNKNOWN" in line or 'UNCLASSIFIED' in line:
                     continue
+
                 # spit lines
                 split_line = line[:-1].split('\t')
                 taxo_n = split_line[0].split('|')
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed macros.xml
--- a/macros.xml Wed Feb 02 22:02:41 2022 +0000
+++ b/macros.xml Sat Dec 03 10:43:00 2022 +0000
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">3.0.14</token>
+    <token name="@TOOL_VERSION@">4.0.3</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">20.01</token>
     <xml name="edam_ontology">
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db.json
--- a/test-data/test-db.json Wed Feb 02 22:02:41 2022 +0000
+++ b/test-data/test-db.json Sat Dec 03 10:43:00 2022 +0000
[
b'@@ -1,1 +1,1 @@\n-{"markers": {"504553__GeneID:10498608": {"ext": ["GCA_000144505", "GCA_000144125", "GCA_000488875", "GCA_000178055", "GCA_001660965", "GCA_000144445", "GCA_003384145", "GCA_002572705", "GCA_001660935", "GCA_001750525", "GCA_000144775", "GCA_003390995", "GCA_001750535", "GCA_000008345", "GCA_003384705", "GCA_003384485", "GCA_002572745", "GCA_000144465", "GCA_000145375", "GCA_002775655", "GCA_001660855", "GCA_001660945", "GCA_000144875", "GCA_003426625", "GCA_000240055", "GCA_003426225", "GCA_003384285", "GCA_000144815", "GCA_000145095", "GCA_900102845", "GCA_001469595", "GCA_000145195", "GCA_003384365", "GCA_000145155", "GCA_003384255", "GCA_003384315", "GCA_000144345", "GCA_000240015", "GCA_000302515", "GCA_002831715", "GCA_000144285", "GCA_003426665", "GCA_001469635", "GCA_000144185", "GCA_002572835", "GCA_001660115", "GCA_000144325", "GCA_000144735", "GCA_000194825", "GCA_000376705", "GCA_000147145", "GCA_002572575", "GCA_000144795", "GCA_003384385", "GCA_000144895", "GCA_003425775", "GCA_003384345", "GCA_000231215", "GCA_003384555", "GCA_000735055", "GCA_000145575", "GCA_000144005", "GCA_003384195", "GCA_001469555", "GCA_003384585", "GCA_003384395", "GCA_000145455", "GCA_000144485", "GCA_003384495", "GCA_000342585", "GCA_003384445", "GCA_000144365", "GCA_000730485", "GCA_002861085", "GCA_002572615", "GCA_000144145", "GCA_002556485", "GCA_001481615", "GCA_000144245", "GCA_000144545", "GCA_000221125", "GCA_001469655", "GCA_003426685", "GCA_000144045", "GCA_002572665", "GCA_002572695", "GCA_002572655", "GCA_001750555", "GCA_003426255", "GCA_000145335", "GCA_002572775"], "score": 37.0, "clade": "t__Propionibacterium_phage_PAS50", "len": 663, "taxon": "k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Caudovirales|f__Siphoviridae|g__Pa6virus|s__Propionibacterium_virus_PAS50|t__Propionibacterium_phage_PAS50"}, "386414__D1VYE0__HMPREF9019_1663": {"clade": "s__Prevotella_timonensis", "ext": [], "len": 1065, "score": 0, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella|s__Prevotella_timonensis"}, "29394__H3NGH1__B5772_02200": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 972, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFY1__B8A41_07655": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 534, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NC06__B8A41_08715": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 1317, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDE2__B8A45_00965": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 699, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFL8__B8A45_06985": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 582, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDD1__B8A45_02325": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 480, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NE37__B8A45_05170": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 756, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NCU9__B8A45_08665": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 765, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "2'..b'terobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076675": ["2|1224|1236|91347|543|547|354276|", 4289814], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958695": ["2|1224|1236|91347|543|547|354276|", 4336076], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077255": ["2|1224|1236|91347|543|547|354276|", 4482974], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075885": ["2|1224|1236|91347|543|547|354276|", 4396160], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001487035": ["2|1224|1236|91347|543|547|354276|", 4279410], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077025": ["2|1224|1236|91347|543|547|354276|", 4303785], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000952555": ["2|1224|1236|91347|543|547|354276|", 4340464], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076755": ["2|1224|1236|91347|543|547|354276|", 4236759], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000783855": ["2|1224|1236|91347|543|547|354276|", 4188638], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022075": ["2|1224|1236|91347|543|547|354276|", 4412384], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022255": ["2|1224|1236|91347|543|547|354276|", 4385357], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075375": ["2|1224|1236|91347|543|547|354276|", 4164369], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075395": ["2|1224|1236|91347|543|547|354276|", 4013880], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_003261215": ["2|1224|1236|91347|543|547|354276|", 5016502], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075185": ["2|1224|1236|91347|543|547|354276|", 4392401], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_002192395": ["2|1224|1236|91347|543|547|354276|", 4556770], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958745": ["2|1224|1236|91347|543|547|354276|", 4382425], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001025055": ["2|1224|1236|91347|543|547|354276|", 4360879], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076455": ["2|1224|1236|91347|543|547|354276|", 4404432]}, "merged_taxon": {}}\n\\ No newline at end of file\n'
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.1.bt2
b
Binary file test-data/test-db/test-db.1.bt2 has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.1.bt2l
b
Binary file test-data/test-db/test-db.1.bt2l has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.2.bt2
b
Binary file test-data/test-db/test-db.2.bt2 has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.2.bt2l
b
Binary file test-data/test-db/test-db.2.bt2l has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.3.bt2
b
Binary file test-data/test-db/test-db.3.bt2 has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.3.bt2l
b
Binary file test-data/test-db/test-db.3.bt2l has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.4.bt2
b
Binary file test-data/test-db/test-db.4.bt2 has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.4.bt2l
b
Binary file test-data/test-db/test-db.4.bt2l has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.pkl
b
Binary file test-data/test-db/test-db.pkl has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.rev.1.bt2
b
Binary file test-data/test-db/test-db.rev.1.bt2 has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.rev.1.bt2l
b
Binary file test-data/test-db/test-db.rev.1.bt2l has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.rev.2.bt2
b
Binary file test-data/test-db/test-db.rev.2.bt2 has changed
b
diff -r 150ebe4d0fd9 -r 892a0629e2ed test-data/test-db/test-db.rev.2.bt2l
b
Binary file test-data/test-db/test-db.rev.2.bt2l has changed