Repository 'metaphlan'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/metaphlan

Changeset 5:6dee4abadccb (2022-12-03)
Previous changeset 4:a16313517e55 (2022-02-02) Next changeset 6:2131d7dca455 (2023-02-27)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
modified:
customizemetadata.py
formatoutput.py
macros.xml
metaphlan.xml
test-data/test-db.json
test-data/test-db/test-db.pkl
added:
test-data/test-db/test-db.1.bt2l
test-data/test-db/test-db.2.bt2l
test-data/test-db/test-db.3.bt2l
test-data/test-db/test-db.4.bt2l
test-data/test-db/test-db.rev.1.bt2l
test-data/test-db/test-db.rev.2.bt2l
removed:
test-data/test-db/test-db.1.bt2
test-data/test-db/test-db.2.bt2
test-data/test-db/test-db.3.bt2
test-data/test-db/test-db.4.bt2
test-data/test-db/test-db.rev.1.bt2
test-data/test-db/test-db.rev.2.bt2
b
diff -r a16313517e55 -r 6dee4abadccb customizemetadata.py
--- a/customizemetadata.py Wed Feb 02 22:03:11 2022 +0000
+++ b/customizemetadata.py Sat Dec 03 10:43:21 2022 +0000
[
@@ -56,6 +56,7 @@
         'taxonomy': in_metadata['taxonomy'],
         'merged_taxon': {}
     }
+
     # transform merged_taxons tuple keys to string
     for k in in_metadata['merged_taxon']:
         n = ' , '.join(k)
@@ -85,6 +86,13 @@
         n = ' , '.split(k)
         out_metadata[n] = in_metadata['merged_taxon'][k]
 
+    # Ensure that there are 8 taxonomy levels (for compatibility between Metaphlan v3 and v4)
+    # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432)
+    # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432)
+    for k in out_metadata['taxonomy']:
+        if out_metadata['taxonomy'][k][0].count('|') == 6:
+            out_metadata['taxonomy'][k] = (out_metadata['taxonomy'][k][0] + '|', out_metadata['taxonomy'][k][1])
+
     # dump metadata to Pickle file
     with bz2.BZ2File(pkl_fp, 'w') as pkl_f:
         pickle.dump(out_metadata, pkl_f)
b
diff -r a16313517e55 -r 6dee4abadccb formatoutput.py
--- a/formatoutput.py Wed Feb 02 22:03:11 2022 +0000
+++ b/formatoutput.py Sat Dec 03 10:43:21 2022 +0000
[
@@ -57,9 +57,11 @@
                 # skip headers
                 if line.startswith("#"):
                     continue
-                # skip UNKNOWN lines in Predicted taxon relative abundances
-                if "UNKNOWN" in line:
+
+                # skip UNKNOWN (v3) or UNCLASSIFIED (v4) lines in predicted taxon relative abundances
+                if "UNKNOWN" in line or 'UNCLASSIFIED' in line:
                     continue
+
                 # spit lines
                 split_line = line[:-1].split('\t')
                 taxo_n = split_line[0].split('|')
b
diff -r a16313517e55 -r 6dee4abadccb macros.xml
--- a/macros.xml Wed Feb 02 22:03:11 2022 +0000
+++ b/macros.xml Sat Dec 03 10:43:21 2022 +0000
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">3.0.14</token>
+    <token name="@TOOL_VERSION@">4.0.3</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">20.01</token>
     <xml name="edam_ontology">
b
diff -r a16313517e55 -r 6dee4abadccb metaphlan.xml
--- a/metaphlan.xml Wed Feb 02 22:03:11 2022 +0000
+++ b/metaphlan.xml Sat Dec 03 10:43:21 2022 +0000
b
@@ -107,7 +107,7 @@
 #if $inputs.db.db_selector == "history"
 mkdir 'ref_db'
 &&
-bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db'
+bowtie2-build --large-index '$inputs.db.bowtie2db' 'ref_db/custom_db'
 &&
 python '$__tool_directory__/customizemetadata.py'
     transform_json_to_pkl
@@ -123,9 +123,15 @@
     --read_min_len $inputs.in.read_min_len
     --bt2_ps '$inputs.in.mapping.bt2_ps'
     --min_mapq_val $inputs.in.mapping.min_mapq_val
+    #if $ext == "sam"
+        --nreads \$(cat '$file_path' | grep -c -v '^@')
+    #end if
 #else
     '$inputs.in.in'
     --input_type '$inputs.in.selector'
+    #if $inputs.in.selector == "sam"
+        --nreads \$(cat '$inputs.in.in' | grep -c -v '^@')
+    #end if
 #end if
 #if $inputs.db.db_selector == "cached"
     --bowtie2db '$inputs.db.cached_db.fields.path'
@@ -334,7 +340,7 @@
             <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
             <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' 
                 label="Use a species as representative for species groups?"/>
-            <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' 
+            <param argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' 
                 label="Old MetaPhlAn2 two columns output?"/>
             <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' 
                 label="Report the profiling using the CAMI output format?"/>
@@ -408,7 +414,7 @@
             </section>
             <output name="output_file" ftype="tabular">
                 <assert_contents>
-                    <has_text text="UNKNOWN"/>
+                    <has_text text="UNCLASSIFIED"/>
                 </assert_contents>
             </output>
             <output name="bowtie2out" ftype="tabular">
@@ -503,7 +509,8 @@
             <output name="krona_output_file" ftype="tabular">
                 <assert_contents>
                     <not_has_text text="k__Bacteria"/>
-                    <has_n_lines n="0"/>
+                    <has_n_lines n="1" delta="1"/>
+                    <has_size value="1" delta="1"/>
                 </assert_contents>
             </output>
         </test>
@@ -1051,7 +1058,7 @@
 ============
 
 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, 
-Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. 
+Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) at species-level. 
 
 MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes 
 (~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:
@@ -1077,8 +1084,8 @@
 aggregated marker information. Such capability comes with several slightly different flavours and 
 are a way to perform strain tracking and comparison across multiple samples.
 
-Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the 
-species present in the community, and then a strain-level profiling can be performed to zoom-in into 
+Usually, MetaPhlAn is first ran with default parameters for the type of analysis to profile the 
+species present in the community, and then a strain-level profiling can be performed to zoom-in on 
 specific species of interest. This operation can be performed quickly as it exploits the bowtie2out 
 intermediate file saved during the execution of the default analysis type.
 
@@ -1087,16 +1094,16 @@
 
 Metaphlan takes as input either:
 
-- one or several sequence files in Fasta, FastQ (compressed or not)
+- one or several sequence files in Fasta, FastQ (whether compressed or not)
 - a BowTie2 produced SAM file
 - an intermediary mapping file of the microbiota generated by a previous MetaPhlAn run
 
-It also need the reference database, which can be locally installed or customized using the dedicated tools.
+It also needs the reference database, which can be locally installed or customized using the dedicated tools.
 
 Outputs
 =======
 
-The main output file is a tab-separated file with the predicted taxon relative abundances.
+The main output is a tab-separated file with the predicted taxon relative abundances.
 
 It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.
 
@@ -1106,7 +1113,7 @@
 
 To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.
 
-.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage
+.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-4#Basic-Usage
 
     ]]></help>
     <expand macro="citations"/>
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db.json
--- a/test-data/test-db.json Wed Feb 02 22:03:11 2022 +0000
+++ b/test-data/test-db.json Sat Dec 03 10:43:21 2022 +0000
[
b'@@ -1,1 +1,1 @@\n-{"markers": {"504553__GeneID:10498608": {"ext": ["GCA_000144505", "GCA_000144125", "GCA_000488875", "GCA_000178055", "GCA_001660965", "GCA_000144445", "GCA_003384145", "GCA_002572705", "GCA_001660935", "GCA_001750525", "GCA_000144775", "GCA_003390995", "GCA_001750535", "GCA_000008345", "GCA_003384705", "GCA_003384485", "GCA_002572745", "GCA_000144465", "GCA_000145375", "GCA_002775655", "GCA_001660855", "GCA_001660945", "GCA_000144875", "GCA_003426625", "GCA_000240055", "GCA_003426225", "GCA_003384285", "GCA_000144815", "GCA_000145095", "GCA_900102845", "GCA_001469595", "GCA_000145195", "GCA_003384365", "GCA_000145155", "GCA_003384255", "GCA_003384315", "GCA_000144345", "GCA_000240015", "GCA_000302515", "GCA_002831715", "GCA_000144285", "GCA_003426665", "GCA_001469635", "GCA_000144185", "GCA_002572835", "GCA_001660115", "GCA_000144325", "GCA_000144735", "GCA_000194825", "GCA_000376705", "GCA_000147145", "GCA_002572575", "GCA_000144795", "GCA_003384385", "GCA_000144895", "GCA_003425775", "GCA_003384345", "GCA_000231215", "GCA_003384555", "GCA_000735055", "GCA_000145575", "GCA_000144005", "GCA_003384195", "GCA_001469555", "GCA_003384585", "GCA_003384395", "GCA_000145455", "GCA_000144485", "GCA_003384495", "GCA_000342585", "GCA_003384445", "GCA_000144365", "GCA_000730485", "GCA_002861085", "GCA_002572615", "GCA_000144145", "GCA_002556485", "GCA_001481615", "GCA_000144245", "GCA_000144545", "GCA_000221125", "GCA_001469655", "GCA_003426685", "GCA_000144045", "GCA_002572665", "GCA_002572695", "GCA_002572655", "GCA_001750555", "GCA_003426255", "GCA_000145335", "GCA_002572775"], "score": 37.0, "clade": "t__Propionibacterium_phage_PAS50", "len": 663, "taxon": "k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Caudovirales|f__Siphoviridae|g__Pa6virus|s__Propionibacterium_virus_PAS50|t__Propionibacterium_phage_PAS50"}, "386414__D1VYE0__HMPREF9019_1663": {"clade": "s__Prevotella_timonensis", "ext": [], "len": 1065, "score": 0, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella|s__Prevotella_timonensis"}, "29394__H3NGH1__B5772_02200": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 972, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFY1__B8A41_07655": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 534, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NC06__B8A41_08715": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 1317, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDE2__B8A45_00965": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 699, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NFL8__B8A45_06985": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 582, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NDD1__B8A45_02325": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 480, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NE37__B8A45_05170": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 756, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "29394__H3NCU9__B8A45_08665": {"clade": "s__Dolosigranulum_pigrum", "ext": [], "len": 765, "score": 0, "taxon": "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Carnobacteriaceae|g__Dolosigranulum|s__Dolosigranulum_pigrum"}, "2'..b'terobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076675": ["2|1224|1236|91347|543|547|354276|", 4289814], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958695": ["2|1224|1236|91347|543|547|354276|", 4336076], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077255": ["2|1224|1236|91347|543|547|354276|", 4482974], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075885": ["2|1224|1236|91347|543|547|354276|", 4396160], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001487035": ["2|1224|1236|91347|543|547|354276|", 4279410], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900077025": ["2|1224|1236|91347|543|547|354276|", 4303785], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000952555": ["2|1224|1236|91347|543|547|354276|", 4340464], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076755": ["2|1224|1236|91347|543|547|354276|", 4236759], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000783855": ["2|1224|1236|91347|543|547|354276|", 4188638], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022075": ["2|1224|1236|91347|543|547|354276|", 4412384], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001022255": ["2|1224|1236|91347|543|547|354276|", 4385357], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075375": ["2|1224|1236|91347|543|547|354276|", 4164369], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075395": ["2|1224|1236|91347|543|547|354276|", 4013880], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_003261215": ["2|1224|1236|91347|543|547|354276|", 5016502], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900075185": ["2|1224|1236|91347|543|547|354276|", 4392401], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_002192395": ["2|1224|1236|91347|543|547|354276|", 4556770], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_000958745": ["2|1224|1236|91347|543|547|354276|", 4382425], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_001025055": ["2|1224|1236|91347|543|547|354276|", 4360879], "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacterales|f__Enterobacteriaceae|g__Enterobacter|s__Enterobacter_cloacae_complex|t__GCA_900076455": ["2|1224|1236|91347|543|547|354276|", 4404432]}, "merged_taxon": {}}\n\\ No newline at end of file\n'
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.1.bt2
b
Binary file test-data/test-db/test-db.1.bt2 has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.1.bt2l
b
Binary file test-data/test-db/test-db.1.bt2l has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.2.bt2
b
Binary file test-data/test-db/test-db.2.bt2 has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.2.bt2l
b
Binary file test-data/test-db/test-db.2.bt2l has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.3.bt2
b
Binary file test-data/test-db/test-db.3.bt2 has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.3.bt2l
b
Binary file test-data/test-db/test-db.3.bt2l has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.4.bt2
b
Binary file test-data/test-db/test-db.4.bt2 has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.4.bt2l
b
Binary file test-data/test-db/test-db.4.bt2l has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.pkl
b
Binary file test-data/test-db/test-db.pkl has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.1.bt2
b
Binary file test-data/test-db/test-db.rev.1.bt2 has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.1.bt2l
b
Binary file test-data/test-db/test-db.rev.1.bt2l has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.2.bt2
b
Binary file test-data/test-db/test-db.rev.2.bt2 has changed
b
diff -r a16313517e55 -r 6dee4abadccb test-data/test-db/test-db.rev.2.bt2l
b
Binary file test-data/test-db/test-db.rev.2.bt2l has changed