diff metaphlan.xml @ 0:f5df500fcc3c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
author iuc
date Mon, 19 Apr 2021 20:56:20 +0000
parents
children b89b0765695d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaphlan.xml	Mon Apr 19 20:56:20 2021 +0000
@@ -0,0 +1,716 @@
+<tool id="metaphlan" name="MetaPhlAn" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>to profile the composition of microbial communities</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <version_command>metaphlan -v</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+#if $inputs.in.selector == "raw"
+    #if $inputs.in.raw_in.selector == "single"
+        #set full_ext=$inputs.in.raw_in.in.datatype.file_ext
+        #if $full_ext.endswith("gz")
+            #set $file_path="in"     
+zcat '$inputs.in.raw_in.in' > '$file_path'
+&&
+        #else if $full_ext.endswith("bz2")
+            #set $file_path="in"
+bzcat '$inputs.in.raw_in.in' > '$file_path'
+&&
+        #else
+            #set $file_path=$inputs.in.raw_in.in
+        #end if
+    #else if $inputs.in.raw_in.selector == "multiple"
+        #set full_ext=$inputs.in.raw_in.in[0].datatype.file_ext
+        #set file_path=""
+        #set sep=""
+        #for $i, $f in enumerate($inputs.in.raw_in.in)
+            #if $f.datatype.file_ext != $full_ext
+echo "Different datatypes for input files"
+&&
+exit 1
+            #end if          
+            #if $full_ext.endswith("gz")
+                #set fp="input_%s" % ($i)
+zcat '$f' > '$fp'
+&&
+            #else if $full_ext.endswith("bz2")
+                #set fp="input_%s" % ($i)
+bzcat '$f' > '$fp'
+&&
+            #else
+                #set fp=$f
+            #end if
+            #set $file_path+="%s%s" % ($sep, $fp)
+            #set $sep=","
+        #end for
+    #else if $inputs.in.raw_in.selector == "paired"
+        #set full_ext=$inputs.in.raw_in.in_f.datatype.file_ext
+        #if $full_ext != $inputs.in.raw_in.in_r.datatype.file_ext
+echo "Different datatypes for input paired-end files"
+&&
+exit 1
+        #end if
+        #if $full_ext.endswith("gz")
+zcat '$inputs.in.raw_in.in_f' > 'in_f'
+&&
+zcat '$inputs.in.raw_in.in_r' > 'in_r'
+&&
+            #set file_path="in_f,in_r"
+        #else if $full_ext.endswith("bz2")
+bzcat '$inputs.in.raw_in.in_f' > 'in_f'
+&&
+bzcat '$inputs.in.raw_in.in_r' > 'in_r'
+&&
+            #set file_path="in_f,in_r"
+        #else
+            #set file_path="%s,%s" % ($inputs.in.raw_in.in_f,$inputs.in.raw_in.in_r)
+        #end if
+    #end if
+
+    #if $full_ext.startswith("fastq")
+        #set ext='fastq'
+    #else if $full_ext.startswith("fasta") and $full_ext.endswith(("gz","bz2"))
+        #set ext='fasta'
+    #else
+        #set ext=$full_ext
+    #end if
+#end if
+
+#if $inputs.db.db_selector == "history"
+mkdir 'ref_db'
+&&
+bowtie2-build '$inputs.db.bowtie2db' 'ref_db/custom_db'
+&&
+python '$__tool_directory__/customizemetadata.py'
+    transform_json_to_pkl
+    --json '$inputs.db.mpa_pkl'
+    --pkl 'ref_db/custom_db.pkl'
+&&
+#end if
+
+metaphlan
+#if $inputs.in.selector == "raw"
+    '$file_path'
+    --input_type '$ext'
+    --read_min_len $inputs.in.read_min_len
+    --bt2_ps '$inputs.in.mapping.bt2_ps'
+    --min_mapq_val $inputs.in.mapping.min_mapq_val
+#else
+    '$inputs.in.in'
+    --input_type '$inputs.in.selector'
+#end if
+#if $inputs.db.db_selector == "cached"
+    --bowtie2db '$inputs.db.cached_db.fields.path'
+    --index '$inputs.db.cached_db.fields.dbkey'
+#else
+    --bowtie2db 'ref_db/'
+    --index 'custom_db'
+#end if
+    -t '$analysis.analysis_type.t'
+#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
+    --tax_lev '$analysis.analysis_type.tax_lev'
+#else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
+    --clade '$analysis.analysis_type.clade'
+    #if str($analysis.analysis_type.min_ab) != ''
+    --min_ab $analysis.analysis_type.min_ab
+    #end if
+#else if $analysis.analysis_type.t == "marker_ab_table" and str($analysis.analysis_type.nreads) != ''
+    --nreads $$analysis.analysis_type.nreads
+#else if $analysis.analysis_type.t == "marker_pres_table" and str($analysis.analysis_type.pres_th) != ''
+    --pres_th $analysis.analysis_type.pres_th
+#end if
+    --min_cu_len $analysis.min_cu_len
+#if str($analysis.min_alignment_len) != ''
+    --min_alignment_len $analysis.min_alignment_len
+#end if
+#if 'add_viruses' in $analysis.organism_profiling
+    --add_viruses
+#end if
+#if 'ignore_eukaryotes' in $analysis.organism_profiling
+    --ignore_eukaryotes
+#end if
+#if 'ignore_bacteria' in $analysis.organism_profiling
+    --ignore_bacteria
+#end if
+#if 'ignore_archaea' in $analysis.organism_profiling
+    --ignore_archaea
+#end if
+    --stat_q $analysis.stat_q
+    --perc_nonzero $analysis.perc_nonzero
+#if $analysis.ignore_markers
+    --ignore_markers '$analysis.ignore_markers'
+#end if
+    $analysis.avoid_disqm
+    --sample_id_key '$out.sample_id_key'
+    --sample_id '$out.sample_id'
+    $out.use_group_representative
+    $out.legacy_output
+    $out.CAMI_format_output
+    $out.unknown_estimation
+    -o '$output_file'
+    --bowtie2out 'bowtie2out'
+    -s '$sam_output_file'
+    --biom '$biom_output_file'
+    --nproc \${GALAXY_SLOTS:-4}
+
+#if $inputs.in.selector == "raw"
+&&
+mv 'bowtie2out' '$bowtie2out'
+#end if
+    ]]></command>
+    <inputs>
+        <section name="inputs" title="Inputs" expanded="true">
+            <conditional name="in">
+                <param name="selector" type="select" label="Input(s)">
+                    <option value="raw" selected="true">Fasta/FastQ file(s) with metagenomic reads</option>
+                    <option value="sam">Externally BowTie2-mapped SAM file</option>
+                    <option value="bowtie2out">Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run</option>
+                </param>
+                <when value="raw">
+                    <conditional name="raw_in">
+                        <param name="selector" type="select" label="Fasta/FastQ file(s) with metagenomic reads">
+                            <option value="single" selected="true">One single-end file</option>
+                            <option value="multiple">Multiple single-end files</option>
+                            <option value="paired">Paired-end files</option>
+                        </param>
+                        <when value="single">
+                            <param name="in" type="data" format="@FILE_FORMATS@" label="Single-end Fasta/FastQ file with metagenomic reads"/>
+                        </when>
+                        <when value="multiple">
+                            <param name="in" type="data" format="@FILE_FORMATS@" multiple="true" label="Single-end Fasta/FastQ files with metagenomic reads"/>
+                        </when>
+                        <when value="paired">
+                            <param name="in_f" type="data" format="@FILE_FORMATS@" label="Forward paired-end Fasta/FastQ file with metagenomic reads"/>
+                            <param name="in_r" type="data" format="@FILE_FORMATS@" label="Reverse paired-end Fasta/FastQ file with metagenomic reads"/>
+                        </when>
+                    </conditional>
+                    <param argument="--read_min_len" type="integer" value="70" label="Minimum length of the reads to be considered when parsing the input file"/>
+                    <section name="mapping" title="Mapping" expanded="true">
+                        <param argument="--bt2_ps" type="select" label="Presets options for BowTie2" help="Applied only with FASTA files">
+                            <option value="sensitive">Sensitive</option>
+                            <option value="very-sensitive" selected="true">Very sensitive</option>
+                            <option value="sensite-local">Sensitive local</option>
+                            <option value="very-sensite-local">Very sensitive local</option>
+                        </param>
+                        <param argument="--min_mapq_val" type="integer" value="5" label="Minimum mapping quality value (MAPQ)"/>
+                    </section>
+                </when>
+                <when value="sam">
+                    <param name="in" type="data" format="sam" label="Externally BowTie2-mapped SAM file" help="BowTie2 needs to be used first to map metagenom reads"/>
+                </when>
+                <when value="bowtie2out">
+                    <param name="in" type="data" format="tabular" label="Intermediary mapping file of the metagenome generated by a previous MetaPhlAn run" 
+                        help="File needs to be generated with MetaPhlAn versions >3.0"/>
+                </when>
+            </conditional>
+            <conditional name="db">
+                <param name="db_selector" type="select" label="Database with clade-specific marker genes">
+                    <option value="cached" selected="true">Locally cached</option>
+                    <option value="history">From history</option>
+                </param>
+                <when value="cached">
+                    <param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
+                        <options from_data_table="metaphlan_database">
+                            <validator message="No MetaPhlAn database is available" type="no_options" />
+                        </options>
+                    </param>
+                </when>
+                <when value="history">
+                    <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
+                    <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/>
+                </when>
+            </conditional>
+        </section>
+        <section name="analysis" title="Analysis" expanded="true">
+            <conditional name="analysis_type">
+                <param argument="-t" type="select" label="Type of analysis to perform">
+                    <option value="rel_ab" selected="true">rel_ab: Profiling a metagenomes in terms of relative abundances</option>
+                    <option value="rel_ab_w_read_stats">rel_ab_w_read_stats: Profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade</option>
+                    <option value="reads_map">reads_map: Mapping from reads to clades (only reads hitting a marker)</option>
+                    <option value="clade_profiles">clade_profiles: Normalized marker counts for clades with at least a non-null marker</option>
+                    <option value="clade_specific_strain_tracker">clade_specific_strain_tracker: List of markers present for a specific clade and all its subclades</option>
+                    <option value="marker_ab_table">marker_ab_table: Normalized marker counts (only when > 0.0 and normalized by metagenome size if number of reads is specified)</option>
+                    <option value="marker_counts">marker_counts: Non-normalized marker counts (use with extreme caution)</option>
+                    <option value="marker_pres_table">marker_pres_table: List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
+                </param>
+                <when value="rel_ab">
+                    <expand macro="tax_lev"/>
+                </when>
+                <when value="rel_ab_w_read_stats">
+                    <expand macro="tax_lev"/>
+                </when>
+                <when value="reads_map"/>
+                <when value="clade_profiles"/>
+                <when value="clade_specific_strain_tracker">
+                    <param argument="--clade" type="text" value="" label="Clade for which to extract list of markers present" 
+                        help="Markers are also extracted for subclades" />
+                    <param argument="--min_ab" type="float" optional="true" label="The minimum percentage abundance for the clade"/>
+                </when>
+                <when value="marker_ab_table">
+                    <param argument="--nreads" type="integer" optional="true" label="Total number of reads in the original metagenome" 
+                        help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/>
+                </when>
+                <when value="marker_counts"/>
+                <when value="marker_pres_table">
+                    <param argument="--pres_th" type="integer" optional="true" label="Threshold for calling a marker present"/>
+                </when>
+            </conditional>
+            <param argument="--min_cu_len" type="integer" value="2000" 
+                label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances"/>
+            <param argument="--min_alignment_len" type="integer" optional="true" 
+                label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded."/>
+            <param name="organism_profiling" type="select" multiple="true" optional="true" label="Organisms to profile">
+                <option value="add_viruses" selected="true">Profile viral organisms (add_viruses)</option>
+                <option value="ignore_eukaryotes">Ignore eukaryotic organisms (ignore_eukaryotes)</option>
+                <option value="ignore_bacteria">Ignore bacteria organisms (ignore_bacteria)</option>
+                <option value="ignore_archaea">Ignore archea organisms (ignore_archaea)</option>
+            </param>
+            <param argument="--stat" type="select" label="Statistical approach for converting marker abundances into clade abundances">
+                <option value="avg_g">avg_g: Clade global (i.e. normalizing all markers together) average (avg_g)</option>
+                <option value="avg_l">avg_l: Average of length-normalized marker counts</option>
+                <option value="tavg_g" selected="true">tavg_g: Truncated clade global average at --stat_q quantile</option>
+                <option value="tavg_l">tavg_l: Truncated average of length-normalized marker counts (at --stat_q)</option>
+                <option value="wavg_g">wavg_g: Winsorized clade global average (at --stat_q)</option>
+                <option value="wavg_l">wavg_l: Winsorized average of length-normalized marker counts (at --stat_q)</option>
+                <option value="med">med: Median of length-normalized marker counts</option>
+            </param>
+            <param argument="--stat_q" type="float" value="0.2" label="Quantile value for the robust average"/>
+            <param argument="--perc_nonzero" type="float" value="0.33" label="Percentage of markers with a non zero relative abundance for misidentify a species"/>
+            <param argument="--ignore_markers" type="data" format="txt,tabular" optional="true" label="File containing a list of markers to ignore" help="One marker per line"/>
+            <param argument="--avoid_disqm" type='boolean' checked="true" truevalue='--avoid_disqm' falsevalue='' 
+                label="Deactivate the procedure of disambiguating the quasi-markers based on the marker abundance pattern found in the sample?"
+                help="It is generally recommended to keep the disambiguation procedure in order to minimize false positives"/>
+        </section>
+        <section name="out" title="Outputs" expanded="true">
+            <param argument="--sample_id_key" type="text" value="SampleID" label="Sample ID key for this analysis"/>
+            <param argument="--sample_id" type="text" value="Metaphlan_Analysis" label="Sample ID for this analysis"/>
+            <param argument="--use_group_representative" type='boolean' checked="false" truevalue='--use_group_representative' falsevalue='' 
+                label="Use a species as representative for species groups?"/>
+            <param name="legacy_output" argument="--legacy-output" type='boolean' checked="false" truevalue='--legacy-output' falsevalue='' 
+                label="Old MetaPhlAn2 two columns output?"/>
+            <param argument="--CAMI_format_output" type='boolean' checked="false" truevalue='--CAMI_format_output' falsevalue='' 
+                label="Report the profiling using the CAMI output format?"/>
+            <param argument="--unknown_estimation" type='boolean' checked="false" truevalue='--unknown_estimation' falsevalue='' 
+                label="Scale relative abundances to the number of reads mapping to known clades in order to estimate unknowness?"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="tabular" label="${tool.name} on ${on_string}: Predicted taxon relative abundances" />
+        <data name="bowtie2out" format="tabular" label="${tool.name} on ${on_string}: Bowtie2 output">
+            <filter>inputs['in']['selector'] == "raw"</filter>
+        </data>
+        <data name="sam_output_file" format="sam" label="${tool.name} on ${on_string}: SAM file">
+            <filter>inputs['in']['selector'] == "raw"</filter>
+        </data>
+        <data name="biom_output_file" format="biom1" label="${tool.name} on ${on_string}: BIOM file" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <!-- Single GZ file -->
+                        <param name="selector" value="single"/>
+                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <!-- Cached db -->
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unknown_estimation" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                </assert_contents>
+            </output>
+            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
+                    <has_text text="37637__U2I1U8__N579_01580"/>
+                </assert_contents>
+            </output>
+            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
+                <assert_contents>
+                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <!-- Multiple GZ file -->
+                        <param name="selector" value="multiple"/>
+                        <param name="in" value="SRS014464-Anterior_nares.fasta.gz,SRS014464-Anterior_nares.fasta.gz"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <!-- Local db -->
+                    <param name="db_selector" value="history"/>
+                    <param name="bowtie2db" value="test-db.fasta"/>
+                    <param name="mpa_pkl" value="test-db.json"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unknown_estimation" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
+                    <has_text text="37637__U2I1U8__N579_01580"/>
+                </assert_contents>
+            </output>
+            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
+                <assert_contents>
+                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <!-- Paired GZ file -->
+                        <param name="selector" value="paired"/>
+                        <param name="in_f" value="SRS014464-Anterior_nares.fasta.gz"/>
+                        <param name="in_r" value="SRS014464-Anterior_nares.fasta.gz"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <!-- Cached db -->
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unknown_estimation" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-two-inputs-bowtie2out.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
+                    <has_text text="37637__U2I1U8__N579_01580"/>
+                </assert_contents>
+            </output>
+            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares-two-inputs.sam" compare="sim_size">
+                <assert_contents>
+                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <section name="inputs">
+                <conditional name="in">
+                    <!-- SAM -->
+                    <param name="selector" value="sam"/>
+                    <param name="in" value="SRS014464-Anterior_nares.sam"/>
+                </conditional>
+                <conditional name="db">
+                    <!-- Cached db -->
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unknown_estimation" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <section name="inputs">
+                <conditional name="in">
+                    <!-- bowtie2out -->
+                    <param name="selector" value="bowtie2out"/>
+                    <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
+                </conditional>
+                <conditional name="db">
+                    <!-- Cached db -->
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <conditional name="in">
+                <param name="selector" value="bowtie2out"/>
+                <param name="in" value="SRS014464-Anterior_nares-bowtie2out.tabular"/>
+            </conditional>
+            <section name="mapping">
+                <param name="bt2_ps" value="sensite"/>
+                <param name="min_mapq_val" value="5"/>
+            </section>
+            <section name="analysis">
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="false"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unknown_estimation" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="relative_abundance"/>
+                    <has_text text="NCBI_tax_id"/>
+                    <has_text text="clade_name"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <section name="inputs">
+                <conditional name="in">
+                    <param name="selector" value="raw"/>
+                    <conditional name="raw_in">
+                        <!-- Single FASTA file -->
+                        <param name="selector" value="single"/>
+                        <param name="in" value="SRS014464-Anterior_nares.fasta"/>
+                    </conditional>
+                    <param name="read_min_len" value="70"/>
+                    <section name="mapping">
+                        <param name="bt2_ps" value="sensitive"/>
+                        <param name="min_mapq_val" value="5"/>
+                    </section>
+                </conditional>
+                <conditional name="db">
+                    <!-- Cached db -->
+                    <param name="db_selector" value="cached"/>
+                    <param name="cached_db" value="test-db-20210409"/>
+                </conditional>
+            </section>
+            <section name="analysis">
+                <param name="min_cu_len" value="2000"/>
+                <param name="organism_profiling" value="add_viruses"/>
+                <param name="stat" value="avg_g"/>
+                <param name="stat_q" value="0.2"/>
+                <param name="perc_nonzero" value="0.33"/>
+                <param name="ignore_markers" value="marker.txt"/>
+                <param name="avoid_disqm" value="true"/>
+            </section>
+            <section name="out">
+                <param name="sample_id_key" value="SampleID"/>
+                <param name="sample_id" value="Metaphlan_Analysis"/>
+                <param name="use_group_representative" value="false"/>
+                <param name="legacy_output" value="true"/>
+                <param name="CAMI_format_output" value="false"/>
+                <param name="unknown_estimation" value="false"/>
+            </section>
+            <output name="output_file" ftype="tabular" file="SRS014464-Anterior_nares-legacy-abundances.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Corynebacteriales|f__Corynebacteriaceae|g__Corynebacterium|s__Corynebacterium_accolens"/>
+                    <has_text text="SampleID"/>
+                    <has_text text="Metaphlan_Analysis"/>
+                </assert_contents>
+            </output>
+            <output name="bowtie2out" ftype="tabular" file="SRS014464-Anterior_nares-bowtie2out.tabular" compare="sim_size">
+                <assert_contents>
+                    <has_text text="HWI-EAS109_102883399:3:104:7342:14360/1"/>
+                    <has_text text="37637__U2I1U8__N579_01580"/>
+                </assert_contents>
+            </output>
+            <output name="sam_output_file" ftype="sam" file="SRS014464-Anterior_nares.sam" compare="sim_size">
+                <assert_contents>
+                    <has_text text="SN:13076__A0A2I1PE66__CYJ72_10760"/>
+                </assert_contents>
+            </output>
+            <output name="biom_output_file" ftype="biom1" file="SRS014464-Anterior_nares.biom" compare="sim_size">
+                <assert_contents>
+                    <has_text text="k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Moraxella|s__Moraxella_lacunata"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+What it does
+============
+
+MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, 
+Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level. 
+
+MetaPhlAn relies on ~1.1M unique clade-specific marker genes identified from ~100,000 reference genomes 
+(~99,500 bacterial and archaeal and ~500 eukaryotic), allowing:
+
+- unambiguous taxonomic assignments;
+- accurate estimation of organismal relative abundance;
+- species-level resolution for bacteria, archaea, eukaryotes and viruses;
+- strain identification and tracking
+- orders of magnitude speedups compared to existing methods.
+- metagenomic strain-level population genomics
+
+MetaPhlAn clade-abundance estimation
+------------------------------------
+
+The basic usage of MetaPhlAn consists in the identification of the clades (from phyla to species and 
+strains in particular cases) present in the metagenome obtained from a microbiome sample and their 
+relative abundance.
+
+Marker level analysis
+---------------------
+
+MetaPhlAn introduces the capability of characterizing organisms at the strain level using non
+aggregated marker information. Such capability comes with several slightly different flavours and 
+are a way to perform strain tracking and comparison across multiple samples.
+
+Usually, MetaPhlAn is first ran with the default parameter for the type of analysis to profile the 
+species present in the community, and then a strain-level profiling can be performed to zoom-in into 
+specific species of interest. This operation can be performed quickly as it exploits the bowtie2out 
+intermediate file saved during the execution of the default analysis type.
+
+Inputs
+======
+
+Metaphlan takes as input either:
+
+- one or several sequence files in Fasta, FastQ (compressed or not)
+- a BowTie2 produced SAM file
+- an intermediary mapping file of the metagenome generated by a previous MetaPhlAn run
+
+It also need the reference database, which can be locally installed or customized using the dedicated tools.
+
+Outputs
+=======
+
+The main output file is a tab-separated file with the predicted taxon relative abundances.
+
+It also generates a BIOM file and some intermediate files (SAM and BowTie2out) if sequence files are given as inputs.
+
+
+More help and use cases
+=======================
+
+To get more information about MetaPhlAn usage and use cases, please refer to the `Metaphlan documentation`_.
+
+.. _Metaphlan documentation: https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#basic-usage
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>