Mercurial > repos > iuc > humann

<tool id="humann" name="HUMAnN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to profile presence/absence and abundance of microbial pathways and gene families</description>
    <macros>
        <import>macros.xml</import>
        <xml name="prescreen">
            <section name="prescreen" title="Prescreen / Identifying community species" expanded="true">
                <conditional name="metaphlan_db">
                    <param name="selector" type="select" label="Database with clade-specific marker genes">
                        <option value="cached" selected="true">Locally cached</option>
                        <option value="history">From history</option>
                    </param>
                    <when value="cached">
                        <param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
                            <options from_data_table="metaphlan_database">
                                <validator message="No MetaPhlAn database is available" type="no_options" />
                            </options>
                        </param>
                    </when>
                    <when value="history">
                        <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
                        <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/>
                    </when>
                </conditional>
                <param argument="--prescreen-threshold" type="float" value="0.01" min="0" max="100" label="Minimum percentage of reads matching a species"/>
                <!-- add metaphlan options -->
            </section>
        </xml>
        <token name="@PRESCREEN_PREPARE@"><![CDATA[
            #if $wf.prescreen.metaphlan_db.selector == "history"
            mkdir metaphlan_db
            &&
            bowtie2-build '$wf.prescreen.metaphlan_db.bowtie2db' 'metaphlan_db/custom_db-v30'
            &&
            python '$__tool_directory__/customizemetadata.py'
                transform_json_to_pkl
                --json '$wf.prescreen.metaphlan_db.mpa_pkl'
                --pkl 'metaphlan_db/custom_db-v30.pkl'
            &&
            #end if
        ]]></token>
        <token name="@PRESCREEN_RUN@"><![CDATA[
            #set $metaphlan_option = "-t rel_ab"
            #if $wf.prescreen.metaphlan_db.selector == "history"
                #set $metaphlan_option += " --bowtie2db metaphlan_db/"
                #set $metaphlan_option += " --index custom_db-v30"
            #else
                #set $metaphlan_option += " --bowtie2db %s" % $wf.prescreen.metaphlan_db.cached_db.fields.path
                #set $metaphlan_option += " --index %s" % $wf.prescreen.metaphlan_db.cached_db.fields.dbkey
            #end if
            --metaphlan-options="$metaphlan_option"
            --prescreen-threshold $wf.prescreen.prescreen_threshold
        ]]></token>
        <xml name="nucleotide_database">
            <param argument="--nucleotide-database" type="data_collection" collection_type="list" format="fasta" label="Nucleotide database from history" help="Each file must be named: ^[g__].[s__]"/>
        </xml>
        <xml name="nucleotide_search">
            <section name="nucleotide_search" title="Nucleotide search / Mapping reads to community pangenomes"  expanded="true">
                <conditional name="nucleotide_db">
                    <param name="selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
                        <option value="cached" selected="true">Locally cached</option>
                        <option value="history">From history</option>
                    </param>
                    <when value="cached">
                        <param name="nucleotide_database" type="select" label="Nucleotide database">
                            <options from_data_table="humann_nucleotide_database">
                                <validator message="No nucleotide database is available" type="no_options" />
                            </options>
                        </param>
                    </when>
                    <when value="history">
                        <expand macro="nucleotide_database"/>
                    </when>
                </conditional>
                <!-- add bowtie2 options -->
                <param argument="--nucleotide-identity-threshold" type="float" value="0" min="0" max="100"
                label="Identity threshold for nucleotide alignments"/>
                <param argument="--nucleotide-subject-coverage-threshold" type="float" value="50" min="0" max="100"
                    label="Subject coverage threshold for nucleotide alignments"/>
                <param argument="--nucleotide-query-coverage-threshold" type="float" value="90" min="0" max="100"
                    label="Query coverage threshold for nucleotide alignments"/>
            </section>
        </xml>
        <token name="@NUCLEOTIDE_SEARCH_PREPARE@"><![CDATA[
            #if $wf.nucleotide_search.nucleotide_db.selector == 'history'
            mkdir nucleotide_db
            &&
                #for $f in $wf.nucleotide_search.nucleotide_db.nucleotide_database:
            ln -s '$f' 'nucleotide_db/${re.sub('[^\w\-_.]', '_', f.element_identifier)}.v296_201901b' &&
                #end for
            #end if
        ]]></token>
        <token name="@NUCLEOTIDE_SEARCH_RUN@"><![CDATA[
            #if $wf.nucleotide_search.nucleotide_db.selector == 'history'
            --nucleotide-database nucleotide_db
            #else
            --nucleotide-database '$wf.nucleotide_search.nucleotide_db.nucleotide_database.fields.path'
            #end if
            --nucleotide-identity-threshold $wf.nucleotide_search.nucleotide_identity_threshold
            --nucleotide-subject-coverage-threshold $wf.nucleotide_search.nucleotide_subject_coverage_threshold
            --nucleotide-query-coverage-threshold $wf.nucleotide_search.nucleotide_query_coverage_threshold
            ]]></token>
        <xml name="translated_search">
            <section name="translated_search" title="Translated search / Aligning unmapped reads to a protein database" expanded="true">
                <conditional name="protein_db">
                    <param name="selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
                        <option value="cached" selected="true">Locally cached</option>
                        <option value="history">From history</option>
                    </param>
                    <when value="cached">
                        <param name="protein_database" type="select" label="Protein database">
                            <options from_data_table="humann_protein_database">
                                <validator message="No protein database is available" type="no_options" />
                            </options>
                        </param>
                    </when>
                    <when value="history">
                        <param argument="--protein-database" type="data" format="fasta" label="Protein database from history"/>
                        <param argument="--search-mode" type="select" label="Search mode">
                            <option value="uniref50">UniRef50</option>
                            <option value="uniref90" selected="true">UniRef90</option>
                        </param>
                    </when>
                </conditional>
                <param argument="--evalue" type="float" value="1" label="E-value threshold to use with the translated search"/>
                <!-- add diamond options -->
                <param argument="--translated-identity-threshold" type="float" min="0" max="100" optional="true"
                label="Identity threshold for translated alignments"
                help="It is tuned automatically (based on uniref mode) unless a custom value is specified"/>
                <param argument="--translated-subject-coverage-threshold" type="float" value="50" min="0" max="100"
                    label="Subject coverage threshold for translated alignments"/>
                <param argument="--translated-query-coverage-threshold" type="float" value="90" min="0" max="100"
                    label="Query coverage threshold for translated alignments"/>
            </section>
        </xml>
        <token name="@TRANSLATED_SEARCH_PREPARE@"><![CDATA[
            #if $wf.translated_search.protein_db.selector == 'history'
            mkdir protein_db
            &&
            diamond makedb
                --in '$wf.translated_search.protein_db.protein_database'
                --db 'protein_db/protein-db-201901b'
                --threads "\${GALAXY_SLOTS:-4}"
            &&
            #end if
        ]]></token>
        <token name="@TRANSLATED_SEARCH_RUN@"><![CDATA[
            --translated-alignment 'diamond'
            #if $wf.translated_search.protein_db.selector == 'history'
            --protein-database protein_db
            --search-mode '$wf.translated_search.protein_db.search_mode'
            #else
            --protein-database '$wf.translated_search.protein_db.protein_database.fields.path'
                #if 'uniref50' in $wf.translated_search.protein_db.protein_database.fields.value
            --search-mode 'uniref50'
                #else
            --search-mode 'uniref90'
                #end if
            #end if
            --evalue $wf.translated_search.evalue
            #if str($wf.translated_search.translated_identity_threshold) != ''
            --identity-threshold $wf.translated_search.translated_identity_threshold
            #end if
            --translated-subject-coverage-threshold $wf.translated_search.translated_subject_coverage_threshold
            --translated-query-coverage-threshold $wf.translated_search.translated_query_coverage_threshold
        ]]></token>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[
#import re
#if $in.input.ext.startswith("fasta")
    #set ext="fasta"
#else if $in.input.ext.startswith("fastq")
    #set ext="fastq"
#else if $in.input.ext.endswith("bam")
    #set ext="bam"
#else if $in.input.ext == 'sam'
    #set ext="sam"
#else if $in.input.ext == 'biom1'
    #set ext="biom"
#else
    >&2 "unknown extension $in.input.ext"
    exit 1;
#end if
#if $in.input.ext.endswith(".gz")
    #set ext+=".gz"
#end if

#if $wf.selector == 'bypass_prescreen'
    @NUCLEOTIDE_SEARCH_PREPARE@
    @TRANSLATED_SEARCH_PREPARE@
#else if $wf.selector == 'bypass_taxonomic_profiling'
    @NUCLEOTIDE_SEARCH_PREPARE@
    @TRANSLATED_SEARCH_PREPARE@
#else if $wf.selector == 'bypass_nucleotide_index'
    @NUCLEOTIDE_SEARCH_PREPARE@
    @TRANSLATED_SEARCH_PREPARE@
#else if $wf.selector == 'bypass_nucleotide_search'
    @TRANSLATED_SEARCH_PREPARE@
#else if $wf.selector == 'bypass_translated_search'
    @PRESCREEN_PREPARE@
    @NUCLEOTIDE_SEARCH_PREPARE@
#else if $wf.selector == 'none'
    @PRESCREEN_PREPARE@
    @NUCLEOTIDE_SEARCH_PREPARE@
    @TRANSLATED_SEARCH_PREPARE@
#end if

humann
    --input '$input'
    --input-format $ext
    -o 'output'
#if $wf.selector == 'bypass_prescreen'
    --bypass-prescreen
    @NUCLEOTIDE_SEARCH_RUN@
    @TRANSLATED_SEARCH_RUN@
#else if $wf.selector == 'bypass_taxonomic_profiling'
    --taxonomic-profile '$wf.taxonomic_profile'
    @NUCLEOTIDE_SEARCH_RUN@
    @TRANSLATED_SEARCH_RUN@
#else if $wf.selector == 'bypass_nucleotide_index'
    --bypass-nucleotide-index
    @NUCLEOTIDE_SEARCH_RUN@
    @TRANSLATED_SEARCH_RUN@
#else if $wf.selector == 'bypass_nucleotide_search'
    --bypass-nucleotide-search
    @TRANSLATED_SEARCH_RUN@
#else if $wf.selector == 'bypass_translated_search'
    --bypass-translated-search
    @PRESCREEN_RUN@
    @NUCLEOTIDE_SEARCH_RUN@
#else if $wf.selector == 'none'
    @PRESCREEN_RUN@
    @NUCLEOTIDE_SEARCH_RUN@
    @TRANSLATED_SEARCH_RUN@
#end if
    --gap-fill '$g_p_quant.gap_fill'
    --minpath '$g_p_quant.minpath'
    --pathways '$g_p_quant.pathways'
    --xipe '$g_p_quant.xipe'
    --annotation-gene-index $g_p_quant.annotation_gene_index
#if $g_p_quant.id_mapping
    --id-mapping '$g_p_quant.id_mapping'
#end if
    --log-level 'DEBUG'
    --o-log '$log'
    --output-basename '$out.output_basename'
    --output-format '$out.output_format'
    --output-max-decimals $out.output_max_decimals
    $out.remove_column_description_output
    $out.remove_stratified_output
    --threads "\${GALAXY_SLOTS:-4}"
    --memory-use minimum
    ]]></command>
    <inputs>
        <conditional name="in">
            <param name="selector" type="select" label="Input(s)">
                <option value="raw" selected="true">Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))</option>
                <option value="mapping">Pre-computed mappings of reads to database sequences</option>
                <option value="abundance">Pre-computed (typically gene) abundance tables</option>
            </param>
            <when value="raw">
                <param name="input" type="data" format="fastq,fastq.gz,fasta,fasta.gz"
                label="Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))"
                help="Paired-end Fasta/FastQ files should be merged first"/>
            </when>
            <when value="mapping">
                <param name="input" type="data" format="sam,bam" label="Pre-computed mappings of reads to database sequences"/>
            </when>
            <when value="abundance">
                <param name="input" type="data" format="tabular,tsv,biom1" label="Pre-computed (typically gene) abundance tables"/>
            </when>
        </conditional>
        <conditional name="wf">
            <param name="selector" type="select" label="Steps">
                <option value="bypass_prescreen">Bypass the prescreen step and run on the full ChocoPhlAn database (--bypass-prescreen)</option>
                <option value="bypass_taxonomic_profiling">Bypass the taxonomic profiling step and creates a custom ChocoPhlAn database of the species provided afterwards</option>
                <option value="bypass_nucleotide_index">Starts the workflow with the nucleotide alignment step using the provided indexed database (--bypass-nucleotide-index)</option>
                <option value="bypass_nucleotide_search">Bypass all of the alignment steps before the translated search (--bypass_nucleotide-search)</option>
                <option value="bypass_translated_search">Run all of the alignment steps except the translated search (--bypass_translated-search)</option>
                <option value="none" selected="true">Run the full workflow steps</option>
            </param>
            <when value="bypass_prescreen">
                <expand macro="nucleotide_search"/>
                <expand macro="translated_search"/>
            </when>
            <when value="bypass_taxonomic_profiling">
                <param argument="--taxonomic-profile" type="data" format="tabular,txt,tsv" label="Taxonomic profile file"/>
                <expand macro="nucleotide_search"/>
                <expand macro="translated_search"/>
            </when>
            <when value="bypass_nucleotide_index">
                <expand macro="nucleotide_search"/>
                <expand macro="translated_search"/>
            </when>
            <when value="bypass_nucleotide_search">
                <expand macro="translated_search"/>
            </when>
            <when value="bypass_translated_search">
                <expand macro="prescreen"/>
                <expand macro="nucleotide_search"/>
            </when>
            <when value="none">
                <expand macro="prescreen"/>
                <expand macro="nucleotide_search"/>
                <expand macro="translated_search"/>
            </when>
        </conditional>
        <section name="g_p_quant" title="Gene and pathway quantification" expanded="true">
            <param argument="--gap-fill" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use gap fill computation?"/>
            <param argument="--minpath" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use minpath computation?"/>
            <param argument="--pathways" type="select" label="Database to use for pathway computations">
                <option value="metacyc" selected="true">MetaCyc</option>
                <option value="unipathway">UniPathway</option>
            </param>
            <param argument="--xipe" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Use xipe computation?"/>
            <param argument="--annotation-gene-index" type="integer" value="3" label="Index of the gene in the sequence annotation"/>
            <param argument="--id-mapping" type="data" format="tsv" optional="true" label="id mapping file for alignments"/>
        </section>
        <section name="out" title="Outputs" expanded="true">
            <param argument="--output-basename" type="text" value="humann" label="basename">
                <sanitizer invalid_char="">
                    <valid initial="string.ascii_letters,string.digits">
                        <add value="_" />
                        <add value="-" />
                    </valid>
                </sanitizer>
                <validator type="empty_field" />
            </param>
            <param argument="--output-format" type="select" label="Format of the output files">
                <option value="tsv" selected="true">Tabular</option>
                <option value="biom">BIOM</option>
            </param>
            <param argument="--output-max-decimals" type="integer" value="10" label="Number of decimals to output"/>
            <param argument="--remove-column-description-output" type="boolean" truevalue="--remove-column-description-output" falsevalue="" checked="false" label="Remove description in the output column?"/>
            <param argument="--remove-stratified-output" type="boolean" truevalue="--remove-stratified-output" falsevalue="" checked="false" label="Remove stratification from output?"/>
            <param name="intermediate_temp" type="select" multiple="true" label="Intermediate output files">
                <option value="metaphlan_bowtie2">MetaPhlAn Bowtie2 output</option>
                <option value="metaphlan_bugs_list">MetaPhlAn bugs list</option>
                <option value="bowtie2_alignment">Bowtie2 alignment results</option>
                <option value="bowtie2_reduced_alignment">Bowtie2 reduced alignment results</option>
                <option value="bowtie2_unaligned">Unaligned reads after Bowtie2</option>
                <option value="custom_chocophlan_database">Custom ChocoPhlAn database</option>
                <option value="diamond_aligned">Translated alignment results</option>
                <option value="diamond_unaligned">Translated alignment unaligned reads</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="gene_families_tsv" format="tabular" from_work_dir="output/*_genefamilies.tsv" label="${tool.name} on ${on_string}: Gene families and their abundance" >
            <filter>out['output_format'] == "tsv"</filter>
        </data>
        <data name="gene_families_biom" format="biom1" from_work_dir="output/*_genefamilies.biom" label="${tool.name} on ${on_string}: Gene families and their abundance" >
            <filter>out['output_format'] == "biom"</filter>
        </data>
        <data name="pathcoverage_tsv" format="tabular" from_work_dir="output/*_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" >
            <filter>out['output_format'] == "tsv"</filter>
        </data>
        <data name="pathcoverage_biom" format="biom1" from_work_dir="output/*_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" >
            <filter>out['output_format'] == "biom"</filter>
        </data>
        <data name="pathabundance_tsv" format="tabular" from_work_dir="output/*_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" >
            <filter>out['output_format'] == "tsv"</filter>
        </data>
        <data name="pathabundance_biom" format="biom1" from_work_dir="output/*_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" >
            <filter>out['output_format'] == "biom"</filter>
        </data>
        <data format="txt" name="log" label="${tool.name} on ${on_string}: Log"/>
        <data format="tabular" name="metaphlan_bowtie2" from_work_dir="output/*_temp/*_metaphlan_bowtie2.txt" label="${tool.name} on ${on_string}: MetaPhlAn Bowtie2 output" >
            <filter>out['intermediate_temp'] and "metaphlan_bowtie2" in out['intermediate_temp']</filter>
        </data>
        <data format="tabular" name="metaphlan_bugs_list" from_work_dir="output/*_temp/*_metaphlan_bugs_list.tsv" label="${tool.name} on ${on_string}: MetaPhlAn bugs list" >
            <filter>out['intermediate_temp'] and "metaphlan_bugs_list" in out['intermediate_temp']</filter>
        </data>
        <data format="sam" name="bowtie2_alignment" from_work_dir="output/*_temp/*_bowtie2_aligned.sam" label="${tool.name} on ${on_string}: Bowtie2 alignment results" >
            <filter>out['intermediate_temp'] and "bowtie2_alignment" in out['intermediate_temp']</filter>
        </data>
        <data format="tabular" name="bowtie2_reduced_alignment" from_work_dir="output/*_temp/*_bowtie2_aligned.tsv" label="${tool.name} on ${on_string}: Bowtie2 reduced alignment results" >
            <filter>out['intermediate_temp'] and "bowtie2_reduced_alignment" in out['intermediate_temp']</filter>
        </data>
        <data format="fasta" name="bowtie2_unaligned" from_work_dir="output/*_temp/*_bowtie2_unaligned.fa" label="${tool.name} on ${on_string}: Unaligned reads after Bowtie2" >
            <filter>out['intermediate_temp'] and "bowtie2_unaligned" in out['intermediate_temp']</filter>
        </data>
        <data format="fasta" name="custom_chocophlan_database" from_work_dir="output/*_temp/*_custom_chocophlan_database.ffn" label="${tool.name} on ${on_string}: Custom ChocoPhlAn database" >
            <filter>out['intermediate_temp'] and "custom_chocophlan_database" in out['intermediate_temp']</filter>
        </data>
        <data format="tabular" name="diamond_aligned" from_work_dir="output/*_temp/*_diamond_aligned.tsv" label="${tool.name} on ${on_string}: Translated alignment results" >
            <filter>out['intermediate_temp'] and "diamond_aligned" in out['intermediate_temp']</filter>
        </data>
        <data format="fasta" name="diamond_unaligned" from_work_dir="output/*_temp/*_diamond_unaligned.fa" label="${tool.name} on ${on_string}: Translated alignment unaligned reads" >
            <filter>out['intermediate_temp'] and "diamond_unaligned" in out['intermediate_temp']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="12">
            <conditional name="in">
                <!-- raw fasta file -->
                <param name="selector" value="raw"/>
                <param name="input" value="demo.fastq.gz"/>
            </conditional>
            <conditional name="wf">
                <!-- full workflow -->
                <param name="selector" value="none"/>
                <section name="prescreen">
                    <conditional name="metaphlan_db">
                        <param name="selector" value="history"/>
                        <param name="bowtie2db" value="test-db/metaphlan-db/demo-db-v30.fasta"/>
                        <param name="mpa_pkl" value="test-db/metaphlan-db/demo-db-v30.json"/>
                    </conditional>
                    <param name="prescreen_threshold" value="0.01"/>
                </section>
                <section name="nucleotide_search">
                    <conditional name="nucleotide_db">
                        <param name="selector" value="history"/>
                        <param name="nucleotide_database">
                            <collection type="list">
                                <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz" />
                                <element name="g__Bacteroides.s__Bacteroides_vulgatus.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz" />
                            </collection>
                        </param>
                    </conditional>
                    <param name="nucleotide_identity_threshold" value="0"/>
                    <param name="nucleotide_subject_coverage_threshold" value="50"/>
                    <param name="nucleotide_query_coverage_threshold" value="90"/>
                </section>
                <section name="translated_search">
                    <conditional name="protein_db">
                        <param name="selector" value="history"/>
                        <param name="protein_database" value="test-db/protein-db/uniref90_demo_prots_v201901b.fasta"/>
                        <param name="search_mode" value="uniref90"/>
                    </conditional>
                    <param name="evalue" value="1"/>
                    <param name="translated_subject_coverage_threshold" value="50"/>
                    <param name="translated_query_coverage_threshold" value="90"/>
                </section>
            </conditional>
            <section name="g_p_quant">
                <param name="gap_fill" value="true"/>
                <param name="minpath" value="true"/>
                <param name="pathways" value="metacyc"/>
                <param name="xipe" value="false"/>
                <param name="annotation_gene_index" value="3"/>
            </section>
            <section name="out">
                <!-- intermediate files -->
                <param name="output_basename" value="humann"/>
                <param name="log_level" value="DEBUG"/>
                <param name="output_format" value="tsv"/>
                <param name="output_max_decimals" value="10"/>
                <param name="remove_column_description_output" value="false"/>
                <param name="remove_statified_output" value="false"/>
                <param name="intermediate_temp"
                    value="metaphlan_bowtie2,metaphlan_bugs_list,bowtie2_alignment,bowtie2_reduced_alignment,bowtie2_unaligned,custom_chocophlan_database,diamond_aligned,diamond_unaligned"/>
            </section>
            <output name="gene_families_tsv" ftype="tabular" value="demo_genefamilies.tsv" compare="sim_size">
                <assert_contents>
                    <has_text text="humann_Abundance-RPKs"/>
                    <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathcoverage_tsv" ftype="tabular" value="demo_pathcoverage.tsv" compare="sim_size">
                <assert_contents>
                    <has_text text="humann_Coverage"/>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathabundance_tsv" ftype="tabular" value="demo_pathabundance.tsv" compare="sim_size">
                <assert_contents>
                    <has_text text="humann_Abundance"/>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="log" ftype="txt">
                <assert_contents>
                    <has_text text="DATABASE SETTINGS"/>
                    <has_text text="humann.utilities"/>
                    <has_text text="humann_genefamilies"/>
                    <has_text text="humann_pathabundance"/>
                    <has_text text="humann_pathcoverage"/>
                    <has_text text="g__Bacteroides.s__Bacteroides_dorei"/>
                </assert_contents>
            </output>
            <output name="metaphlan_bowtie2" ftype="tabular">
                <assert_contents>
                    <has_text text="s__Bacteroides_dorei_read000116"/>
                    <has_text text="357276__I9R1V6__DXD47_04125"/>
                    <has_text text="s__Bacteroides_dorei_read000129"/>
                    <has_text text="357276__B6W1Y5__IY41_11405"/>
                </assert_contents>
            </output>
            <output name="metaphlan_bugs_list" ftype="tabular">
                <assert_contents>
                    <has_text text="relative_abundance"/>
                    <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus"/>
                    <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei"/>
                </assert_contents>
            </output>
            <output name="bowtie2_alignment" ftype="sam">
                <assert_contents>
                    <has_text text="SN:821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/>
                    <has_text text="s__Bacteroides_dorei_read009840"/>
                    <has_text text="PN:bowtie2"/>
                    <has_text text="LN:1281"/>
                </assert_contents>
            </output>
            <output name="bowtie2_reduced_alignment" ftype="tabular">
                <assert_contents>
                    <has_text text="s__Bacteroides_dorei_read000001"/>
                    <has_text text="821__A6L5K0__BVU_3338|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A6L5K0|UniRef50_A6L5K0|468"/>
                    <has_text text="s__Bacteroides_vulgatus_read003845"/>
                    <has_text text="821__A0A396BBC3__DXC03_14350|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A396BBC3|UniRef50_A0A174FNA3|2934"/>
                </assert_contents>
            </output>
            <output name="bowtie2_unaligned" ftype="fasta">
                <assert_contents>
                    <has_text text=">s__Bacteroides_dorei_read000001|100"/>
                    <has_text text=">s__Bacteroides_dorei_read000002|100"/>
                    <has_text text=">unclassified_read000971|100"/>
                    <has_text text=">s__Bacteroides_vulgatus_read004473|100"/>
                </assert_contents>
            </output>
            <output name="custom_chocophlan_database" ftype="fasta">
                <assert_contents>
                    <has_text text=">821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/>
                    <has_text text=">821__F3PUY1__HMPREF9446_02555|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PUY1|UniRef50_A0A3E5DX68|411"/>
                    <has_text text=">821__A0A3E4KCH0__DXD33_19495|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A3E4KCH0|UniRef50_F3PP72|3582"/>
                </assert_contents>
            </output>
            <output name="diamond_aligned" ftype="tabular">
                <assert_contents>
                    <has_text text="UniRef90_Z5XVM9|969"/>
                    <has_text text="s__Bacteroides_vulgatus_read"/>
                    <has_text text="s__Bacteroides_vulgatus_read"/>
                    <has_text text="UniRef90_Y0KEF3|618"/>
                </assert_contents>
            </output>
            <output name="diamond_unaligned" ftype="fasta">
                <assert_contents>
                    <has_text text=">s__Bacteroides_dorei_read000001|100"/>
                    <has_text text=">s__Bacteroides_vulgatus_read006412|100"/>
                    <has_text text=">unclassified_read000867|100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <conditional name="in">
                <!-- fastq file -->
                <param name="selector" value="raw"/>
                <param name="input" value="demo.fasta.gz"/>
            </conditional>
            <conditional name="wf">
                <!-- bypass_prescreen -->
                <param name="selector" value="bypass_prescreen"/>
                <section name="nucleotide_search">
                    <conditional name="nucleotide_db">
                        <param name="selector" value="cached"/>
                        <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
                    </conditional>
                    <param name="nucleotide_identity_threshold" value="0"/>
                    <param name="nucleotide_subject_coverage_threshold" value="50"/>
                    <param name="nucleotide_query_coverage_threshold" value="90"/>
                </section>
                <section name="translated_search">
                    <conditional name="protein_db">
                        <param name="selector" value="cached"/>
                        <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
                    </conditional>
                    <param name="evalue" value="1"/>
                    <param name="translated_subject_coverage_threshold" value="50"/>
                    <param name="translated_query_coverage_threshold" value="90"/>
                </section>
            </conditional>
            <section name="g_p_quant">
                <param name="gap_fill" value="true"/>
                <param name="minpath" value="true"/>
                <param name="pathways" value="metacyc"/>
                <param name="xipe" value="false"/>
                <param name="annotation_gene_index" value="3"/>
            </section>
            <section name="out">
                <!-- Biom -->
                <param name="output_basename" value="humann"/>
                <param name="log_level" value="DEBUG"/>
                <param name="output_format" value="biom"/>
                <param name="output_max_decimals" value="10"/>
                <param name="remove_column_description_output" value="false"/>
                <param name="remove_statified_output" value="false"/>
                <param name="intermediate_temp" value=""/>
            </section>
            <output name="gene_families_biom" ftype="biom1">
                <assert_contents>
                    <has_text text="biom-format"/>
                    <has_text text="UniRef90_A0A396BPQ7|g__Bacteroides.s__Bacteroides_vulgatus"/>
                    <has_text text="UniRef90_W8YTG4|unclassified"/>
                </assert_contents>
            </output>
            <output name="pathcoverage_biom" ftype="biom1">
                <assert_contents>
                    <has_text text="TREE"/>
                    <has_text text="format-url"/>
                    <has_text text="biom-format"/>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
                    <has_text text="humann_Coverage"/>
                </assert_contents>
            </output>
            <output name="pathabundance_biom" ftype="biom1">
                <assert_contents>
                    <has_text text="TREE"/>
                    <has_text text="format-url"/>
                    <has_text text="biom-format"/>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
                    <has_text text="humann_Abundance"/>
                </assert_contents>
            </output>
            <output name="log" ftype="txt">
                <assert_contents>
                    <has_text text="Running bowtie2-build ........"/>
                    <has_text text="Total bugs from nucleotide alignment: 2"/>
                    <has_text text="Total gene families from nucleotide alignment: "/>
                    <has_text text="Aligning to reference database: "/>
                    <has_text text="Total gene families after translated alignment: "/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <conditional name="in">
                <param name="selector" value="raw"/>
                <param name="input" value="demo.fasta.gz"/>
            </conditional>
            <conditional name="wf">
                <!-- bypass_taxonomic_profiling -->
                <param name="selector" value="bypass_taxonomic_profiling"/>
                <param name="taxonomic_profile" value="demo-taxonomic-profile.tabular"/>
                <section name="nucleotide_search">
                    <conditional name="nucleotide_db">
                        <param name="selector" value="cached"/>
                        <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
                    </conditional>
                    <param name="nucleotide_identity_threshold" value="0"/>
                    <param name="nucleotide_subject_coverage_threshold" value="50"/>
                    <param name="nucleotide_query_coverage_threshold" value="90"/>
                </section>
                <section name="translated_search">
                    <conditional name="protein_db">
                        <param name="selector" value="cached"/>
                        <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
                    </conditional>
                    <param name="evalue" value="1"/>
                    <param name="translated_subject_coverage_threshold" value="50"/>
                    <param name="translated_query_coverage_threshold" value="90"/>
                </section>
            </conditional>
            <section name="g_p_quant">
                <param name="gap_fill" value="true"/>
                <param name="minpath" value="true"/>
                <param name="pathways" value="metacyc"/>
                <param name="xipe" value="false"/>
                <param name="annotation_gene_index" value="3"/>
            </section>
            <section name="out">
                <param name="output_basename" value="humann"/>
                <param name="log_level" value="DEBUG"/>
                <param name="output_format" value="tsv"/>
                <param name="output_max_decimals" value="10"/>
                <param name="remove_column_description_output" value="false"/>
                <param name="remove_statified_output" value="false"/>
                <param name="intermediate_temp" value=""/>
            </section>
            <output name="gene_families_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="humann_Abundance-RPKs"/>
                    <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/>
                    <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathcoverage_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="humann_Coverage"/>
                    <has_text text="UNINTEGRATED|unclassified"/>
                    <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathabundance_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="humann_Abundance"/>
                    <has_text text="UNINTEGRATED|unclassified"/>
                    <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/>

                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="log" ftype="txt">
                <assert_contents>
                    <has_text text="Found g__Bacteroides.s__Bacteroides_vulgatus : "/>
                    <has_text text="Total species selected from prescreen: 2"/>
                    <has_text text="Total bugs from nucleotide alignment: 2"/>
                    <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: "/>
                    <has_text text="g__Bacteroides.s__Bacteroides_dorei: "/>
                    <has_text text="Total gene families from nucleotide alignment: "/>
                    <has_text text="Total bugs after translated alignment: 3"/>
                    <has_text text="Total gene families after translated alignment"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <conditional name="in">
                <!-- mapping SAM file -->
                <param name="selector" value="mapping"/>
                <param name="input" value="demo.sam"/>
            </conditional>
            <conditional name="wf">
                <!-- bypass_nucleotide_index -->
                <param name="selector" value="bypass_nucleotide_index"/>
                <section name="nucleotide_search">
                    <conditional name="nucleotide_db">
                        <param name="selector" value="cached"/>
                        <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
                    </conditional>
                    <param name="nucleotide_identity_threshold" value="0"/>
                    <param name="nucleotide_subject_coverage_threshold" value="50"/>
                    <param name="nucleotide_query_coverage_threshold" value="90"/>
                </section>
                <section name="translated_search">
                    <conditional name="protein_db">
                        <param name="selector" value="cached"/>
                        <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
                    </conditional>
                    <param name="evalue" value="1"/>
                    <param name="translated_subject_coverage_threshold" value="50"/>
                    <param name="translated_query_coverage_threshold" value="90"/>
                </section>
            </conditional>
            <section name="g_p_quant">
                <param name="gap_fill" value="true"/>
                <param name="minpath" value="true"/>
                <param name="pathways" value="metacyc"/>
                <param name="xipe" value="false"/>
                <param name="annotation_gene_index" value="3"/>
            </section>
            <section name="out">
                <param name="output_basename" value="humann"/>
                <param name="log_level" value="DEBUG"/>
                <param name="output_format" value="tsv"/>
                <param name="output_max_decimals" value="10"/>
                <param name="remove_column_description_output" value="false"/>
                <param name="remove_statified_output" value="false"/>
                <param name="intermediate_temp" value=""/>
            </section>
            <output name="gene_families_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="UniRef90_R6HHA8|g__Bacteroides.s__Bacteroides_dorei"/>
                    <has_text text="UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathcoverage_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="UNMAPPED"/>
                    <has_text text="UNINTEGRATED"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathabundance_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="UNMAPPED"/>
                    <has_text text="UNINTEGRATED"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="log" ftype="txt">
                <assert_contents>
                    <has_text text="Process the sam mapping results"/>
                    <has_text text="Computing gene families"/>
                    <has_text text="Computing pathways abundance and coverage"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <conditional name="in">
                <!-- raw fasta file -->
                <param name="selector" value="raw"/>
                <param name="input" value="demo.fastq.gz"/>
            </conditional>
            <conditional name="wf">
                <!-- bypass_nucleotide_search -->
                <param name="selector" value="bypass_nucleotide_search"/>
                <section name="translated_search">
                    <conditional name="protein_db">
                        <param name="selector" value="cached"/>
                        <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
                    </conditional>
                    <param name="evalue" value="1"/>
                    <param name="translated_subject_coverage_threshold" value="50"/>
                    <param name="translated_query_coverage_threshold" value="90"/>
                </section>
            </conditional>
            <section name="g_p_quant">
                <param name="gap_fill" value="true"/>
                <param name="minpath" value="true"/>
                <param name="pathways" value="metacyc"/>
                <param name="xipe" value="false"/>
                <param name="annotation_gene_index" value="3"/>
            </section>
            <section name="out">
                <param name="output_basename" value="humann"/>
                <param name="log_level" value="DEBUG"/>
                <param name="output_format" value="tsv"/>
                <param name="output_max_decimals" value="10"/>
                <param name="remove_column_description_output" value="false"/>
                <param name="remove_statified_output" value="false"/>
                <param name="intermediate_temp" value=""/>
            </section>
            <output name="gene_families_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="humann_Abundance-RPKs"/>
                    <has_text text="UniRef90_Q9ZUH4|unclassified"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathcoverage_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="humann_Coverage"/>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathabundance_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="humann_Abundance"/>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="log" ftype="txt">
                <assert_contents>
                    <has_text text="Total bugs after translated alignment: 1"/>
                    <has_text text="unclassified: "/>
                    <has_text text="Unaligned reads after translated alignment: "/>
                    <has_text text="Total gene families"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <conditional name="in">
                <!-- raw fasta file -->
                <param name="selector" value="raw"/>
                <param name="input" value="demo.fastq.gz"/>
            </conditional>
            <conditional name="wf">
                <!-- bypass_translated_search -->
                <param name="selector" value="bypass_translated_search"/>
                <section name="prescreen">
                    <conditional name="metaphlan_db">
                        <param name="selector" value="cached"/>
                        <param name="cached_db" value="metaphlan-demo-db-20210421"/>
                    </conditional>
                    <param name="prescreen_threshold" value="0.01"/>
                </section>
                <section name="nucleotide_search">
                    <conditional name="nucleotide_db">
                        <param name="selector" value="cached"/>
                        <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
                    </conditional>
                    <param name="nucleotide_identity_threshold" value="0"/>
                    <param name="nucleotide_subject_coverage_threshold" value="50"/>
                    <param name="nucleotide_query_coverage_threshold" value="90"/>
                </section>
            </conditional>
            <section name="g_p_quant">
                <param name="gap_fill" value="true"/>
                <param name="minpath" value="true"/>
                <param name="pathways" value="metacyc"/>
                <param name="xipe" value="false"/>
                <param name="annotation_gene_index" value="3"/>
            </section>
            <section name="out">
                <param name="output_basename" value="newname"/>
                <param name="log_level" value="DEBUG"/>
                <param name="output_format" value="tsv"/>
                <param name="output_max_decimals" value="10"/>
                <param name="remove_column_description_output" value="false"/>
                <param name="remove_statified_output" value="false"/>
                <param name="intermediate_temp" value=""/>
            </section>
            <output name="gene_families_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="newname_Abundance-RPKs"/>
                    <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathcoverage_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="newname_Coverage"/>
                    <has_text text="UNMAPPED"/>
                    <has_text text="UNINTEGRATED"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="pathabundance_tsv" ftype="tabular">
                <assert_contents>
                    <has_text text="newname_Abundance"/>
                    <has_text text="UNMAPPED"/>
                    <has_text text="UNINTEGRATED"/>
                    <has_n_columns n="2"/>
                </assert_contents>
            </output>
            <output name="log" ftype="txt">
                <assert_contents>
                    <has_text text="Total bugs from nucleotide alignment: 2"/>
                    <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: 1195 hits"/>
                    <has_text text="g__Bacteroides.s__Bacteroides_dorei: 1260 hits"/>
                    <has_text text="Total gene families from nucleotide alignment: 545"/>
                    <has_text text="Bypass translated search"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

This tool corresponds to the main tool in HUMAnN pipeline:


1. Taxomonic prescreen

    Reads are mapped (with MetaPhlAn) to clade-specific marker genes to rapidly identify community species

2. Pangenome search (nucleotide search)

    Reads are mapped (with Bowtie2) to pangenomes of identified species

3. Translated search

    Unclassified reads are aligned to a comprehensive and non-redundant protein database

4. Gene family and pathway quantification

    - Gene abundance estimation

        Mapping results are processed to estimate per-species and community total gene family abundance, weighting by

        - alignment Quality
        - gene length
        - gene coverage

    - Per-species and community-level metabolic network reconstruction

        Genes are mapped to metabolic reactions to identify a parsiomonious set of pathways that explains each species' observed reactions

        Pathway abundance and coverage are quantified by:

        1. optimizing over alternative subpathways
        2. imputing abundance for conspicuously depleted reactions


Inputs
======

HUMAnN can start from a few different types of input data each in a few different types of formats:

- Quality-controlled shotgun sequencing reads

    This is the most common starting point : A metagenome (DNA reads) or metatranscriptome (RNA reads)

- Pre-computed mappings of reads to database sequences

- Pre-computed (typically gene) abundance tables


HUMAnN uses 3 reference databases
Locally cached databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload.

Outputs
=======

HUMAnN creates three output files:

- Gene families and their abundance
- Pathways and their abundance
- Pathways and their coverage

Ten intermediate temp output files can also be retrieved.

    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Wed, 19 May 2021 17:03:51 +0000
parents	65c80ca30373
children	2cd76b089570