view humann2.xml @ 0:3d6f37e7e3a8 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit 7aadbbdef6da644837f62ea428cb859eeff34f04-dirty
author bebatut
date Thu, 26 May 2016 10:20:59 -0400
parents
children bcd414bb721b
line wrap: on
line source

<tool id="humann2" name="HUMAnN2" version="0.6.1">
    <description>to profile presence/absence and abundance of microbial pathways and gene families</description>

    <macros>
        <import>humann2_macros.xml</import>
    </macros>

    <expand macro="requirements"/>
    <expand macro="stdio"/>

    <version_command>
<![CDATA[
    humann2 --version
]]>
    </version_command>

    <command><![CDATA[
        `mkdir nucleotide_db`

        &&

        #if $nucleotide_db.nucleotide_db_selector == "history"
            #for $file in $nucleotide_db.history_nucleotide_db:
                `cp $file nucleotide_db/$file.name`
            #end for

            &&
        #end if        

        `mkdir protein_db`

        &&

        #if $protein_db.protein_db_selector == "history"
            diamond makedb 
                --in $protein_db.history_protein_db 
                --db protein_db/protein_db
        #end if

        &&

        humann2 
            -i "$input_file"
            
            #set $metaphlan_option = "-t rel_ab"
            #if $taxonomic_profile.taxonomic_profile_test == "true":
                --taxonomic-profile $taxonomic_profile.taxonomic_profile_file
            #else

                #if $taxonomic_profile.mpa_pkl.mpa_pkl_selector == "cached"
                    #set $mpa_pkl_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.mpa_pkl.cached_mpa_pkl.input.options.tool_data_table.data])
                    #set $mpa_pkl_db = $taxonomic_profile.mpa_pkl.cached_mpa_pkl.value
                    #set $metaphlan_option += " --mpa_pkl " + $mpa_pkl_table[$mpa_pkl_db]
                #else
                    #set $metaphlan_option += " --mpa_pkl " + $taxonomic_profile.mpa_pkl.history_mpa_pkl
                #end if

                #if $taxonomic_profile.bowtie2db.bowtie2db_selector == "cached"
                    #set $bowtie2_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.bowtie2db.cached_bowtie2db.input.options.tool_data_table.data])
                    #set $bowtie2db_choice = $taxonomic_profile.bowtie2db.cached_bowtie2db.value
                    #set $metaphlan_option += " --bowtie2db " + $bowtie2_table[$bowtie2db_choice]
                #else
                    #set $metaphlan_option += " --bowtie2db " + $taxonomic_profile.bowtie2db.history_bowtie2db
                #end if

            #end if

            --metaphlan-options="$metaphlan_option"

            --evalue $e_value
            --identity-threshold $identity
            --prescreen-threshold $prescreen

            --pathways $pathways

            #if $nucleotide_db.nucleotide_db_selector == "cached"
                --nucleotide-database \${HUMANN2_DIR}/databases/chocophlan
            #else
                --nucleotide-database nucleotide_db
            #end if

            #if $protein_db.protein_db_selector == "cached"
                --protein-database \${HUMANN2_DIR}/databases/uniref
            #else
                --protein-database protein_db
            #end if

            --threads \${GALAXY_SLOTS:-4}

            --xipe $xipe
            --minpath $minpath
            --pick-frames $pick_frames

            -o "output"
            --output-format $output_format
            --output-max-decimals $output_max_dec
            --output-basename "humann2"
            $remove_statified_output            
    ]]></command>

    <inputs>
        <param name="input_file" type="data" format="fastq,fasta,sam,bam,biom" 
            label="Input sequence file" help=""/>

        <conditional name="taxonomic_profile">
            <param name='taxonomic_profile_test' type='select' label="Use a custom taxonomic profile?" help="The file must have been created by MetaPhlan2">
                <option value="true">Yes</option>
                <option value="false" selected="true">No</option>
            </param> 
            <when value="true">
                <param name="taxonomic_profile_file" type="data" format="tabular,txt" label="Taxonomic profile 
                    file" help=""/>
            </when>
            <when value="false">
                <conditional name="mpa_pkl">
                    <param name="mpa_pkl_selector" type="select" label="Metadata for MetaPhlAn2" help="">
                        <option value="cached" selected="true">Locally cached</option>
                        <option value="history">From history</option>
                    </param>

                    <when value="cached">
                        <param name="cached_mpa_pkl" label="Cached metadata" type="select">
                        <options from_data_table="metaphlan2_metadata" />
                        </param>
                    </when>
                    <when value="history">
                        <param name="history_mpa_pkl" type="data" format="fasta" label="Metadata from history"/>
                    </when>
                </conditional>

                <conditional name="bowtie2db">
                    <param name="bowtie2db_selector" type="select" label="BowTie2 database for MetaPhlAn2" help="">
                        <option value="cached" selected="true">Locally cached</option>
                        <option value="history">From history</option>
                    </param>

                    <when value="cached">
                        <param name="cached_bowtie2db" label="Cached BowTie2 database" type="select" >
                        <options from_data_table="metaphlan2_bowtie_db" />
                        </param>
                    </when>
                    <when value="history">
                        <param name="history_bowtie2db" type="data" format="fasta" label="BowTie2 database from history"/>
                    </when>
                </conditional>
            </when>
        </conditional>

        <conditional name="nucleotide_db">
            <param name="nucleotide_db_selector" type="select" label="Nucleotide database" help="">
                <option value="cached" selected="true">Locally cached</option>
                <option value="history">From history (as collection)</option>
            </param>

            <when value="cached"/>
            <when value="history">
                <param format="fasta" name="history_nucleotide_db" type="data_collection" collection_type="list" label="Nucleotide database from history" help="Each file must be named: ^[g__].[s__]"/>
            </when>
        </conditional>

        <conditional name="protein_db">
            <param name="protein_db_selector" type="select" label="Protein database" help="">
                <option value="cached" selected="true">Locally cached</option>
                <option value="history">From history</option>
            </param>

            <when value="cached"/>
            <when value="history">
                <param name="history_protein_db" type="data" format="fasta" label="Protein database from history" help=""/>
            </when>
        </conditional>

        <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold to use with the translated search" help="(--evalue)"/>
        <param name="identity" type="integer" min="0" max="100" value="40" label="Identity threshold for alignments" help="(--identity-threshold)"/>
        <param name="prescreen" type="float" min="0" max="1" value="0.01" label="Minimum percentage of reads matching a species" help="(--prescreen-threshold)"/>

        <param name='pathways' type="select" label="Database to use for pathway computations" help="(--pathways)">
            <option value="metacyc" selected="true">MetaCyc</option>
            <option value="unipathway">UniPathway</option>
        </param>

        <param name='xipe' type='boolean' checked="false" truevalue='on' falsevalue='off' label="Use xipe computation?" help="(--xipe)"/>
        <param name='minpath' type='boolean' checked="true" truevalue='on' falsevalue='off' label="Use minpath computation?" help="(--minpath)"/>
        <param name='pick_frames' type='boolean' checked="true" truevalue='on' falsevalue='off' label="Use pick frames computation?" help="(--pick-frames)"/>

        <param name='output_format' type="select" label="Format of the output files" help="(--output-format)">
            <option value="tsv" selected="true">TSV</option>
            <option value="biom">BIOM</option>
        </param>

        <param name="output_max_dec" type="integer" min="0" max="100" value="10" label="Number of decimals to output" help="(--output-max-decimals)"/>
        <param name='remove_statified_output' type='boolean' checked="false" truevalue='--remove-stratified-output' falsevalue='' label="Remove stratification from output?" help="(--remove-stratified-output)"/>
    </inputs>

    <outputs>
        <data format="tsv" name="gene_families_tsv" from_work_dir="output/humann2_genefamilies.tsv" label="${tool.name} on ${on_string}: Gene families and their abundance" >
            <filter>output_format == "tsv"</filter>
        </data>
        <data format="biom" name="gene_families_biom" from_work_dir="output/humann2_genefamilies.biom" label="${tool.name} on ${on_string}: Gene families and their abundance" >
            <filter>output_format == "biom"</filter>
        </data>

        <data format="tsv" name="pathcoverage_tsv" from_work_dir="output/humann2_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" >
            <filter>output_format == "tsv"</filter>
        </data>
        <data format="biom" name="pathcoverage_biom" from_work_dir="output/humann2_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" >
            <filter>output_format == "biom"</filter>
        </data>

        <data format="tsv" name="pathabundance_tsv" from_work_dir="output/humann2_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" >
            <filter>output_format == "tsv"</filter>
        </data>
        <data format="biom" name="pathabundance_biom" from_work_dir="output/humann2_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" >
            <filter>output_format == "biom"</filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="input_file" value="input_sequences.fasta"/>
            <param name='taxonomic_profile_test' value="true"/>
            <param name="taxonomic_profile_file" value="taxonomic_profile.tabular"/>
            <param name="nucleotide_db_selector" value="history"/>
            <param name="history_nucleotide_db">
                <collection type="list">
                    <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz" />
                    <element name="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz" />
                </collection>
            </param>
            <param name="protein_db_selector" value="history"/>
            <param name="history_protein_db" value="reduced_uniref50.fasta"/>
            <param name="e_value" value="1"/>
            <param name="identity" value="40"/>
            <param name="prescreen" value="0.01"/>
            <param name='pathways' value="metacyc"/>
            <param name='xipe' value="false"/>
            <param name='minpath' value="true"/>
            <param name='pick_frames' value="true"/>
            <param name='output_format' value="tsv"/>
            <param name="output_max_dec" value="10"/>
            <output name="gene_families_tsv" file="expected_gene_family_abundance.tsv"/>
            <output name="pathcoverage_tsv" file="expected_pathway_coverage.tsv"/>
            <output name="pathabundance_tsv" file="expected_pathway_abundance.tsv"/>
        </test>
    </tests>

    <help><![CDATA[
**What it does**

HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.

This tool corresponds to the main tool in HUMAnN2 pipeline.

**Inputs**

The input is a single file corresponding either to filtered shotgun sequencing metagenome file (fastq, fastq.gz, fasta, or fasta.gz format), alignment file (sam, bam or blastm8 format) or gene table file (tsv or biom format).

A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. 

HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.

**Outputs**

HUMAnN creates three output files:

 - A file with gene families and their abundance
 - A file with pathways and their abundance
 - A file with pathways and their coverage

    ]]></help>

    <expand macro="citations"/>
</tool>