view metabat2.xml @ 3:eb50e7eca3dc draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metabat2/ commit 3a3dfd676076438bea1518eb731790edfb4da3c0
author iuc
date Thu, 09 Nov 2023 12:23:52 +0000
parents 708abf08a626
children
line wrap: on
line source

<tool id="metabat2" name="MetaBAT2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>metagenome binning</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
mkdir bins &&
metabat2
    --inFile '$inFile'
    --outFile 'bins/bin'
#if str($advanced.base_coverage_depth_cond.base_coverage_depth) == 'yes'
    #if $advanced.base_coverage_depth_cond.abdFile
    --abdFile '$advanced.base_coverage_depth_cond.abdFile'
    #else if $advanced.base_coverage_depth_cond.cvExt
    --cvExt '$advanced.base_coverage_depth_cond.cvExt'
    #end if
#end if
    --minContig $advanced.minContig
    --maxP $advanced.maxP
    --minS $advanced.minS
    --maxEdges $advanced.maxEdges
    --pTNF $advanced.pTNF
    $advanced.noAdd
    --minCV $advanced.minCV
    --minCVSum $advanced.minCVSum
    --seed $advanced.seed
    --minClsSize $out.minClsSize
    --numThreads \${GALAXY_SLOTS:-4}
    $out.onlyLabel
#if $out.saveCls
    $out.saveCls
    --noBinOut
#end if
#if 'unbinned' in $out.extra_outputs
    --unbinned
#end if
    > process_log.txt
#if 'log' in $out.extra_outputs
    && mv process_log.txt '$process_log'
#end if
    ]]></command>
    <inputs>
        <param argument="--inFile" type="data" format="fasta,fasta.gz" label="Fasta file containing contigs"/>
        <section name="advanced" title="Advanced options">
            <conditional name="base_coverage_depth_cond">
                <param name="base_coverage_depth" type="select" label="Use a base coverage depth file?">
                    <option value="yes">Yes</option>
                    <option value="no" selected="true">No</option>
                </param>
                <when value="no"/>
                <when value="yes">
                    <param argument="--abdFile" type="data" format="tabular" optional="True" label="Tabular depth matrix file having mean and variance of base coverage depth" help="Generated by the Calculate contig depths for MetaBAT2 tool"/>
                    <param argument="--cvExt" type="data" format="tabular" optional="True" label="Base coverage depth file without variance" help="Generated by a tool - not the Calculate contig depths for MetaBAT2 tool"/>
                </when>
            </conditional>
            <param argument="--minContig" type="integer" min="1500" value="1500" label="Minimum size of a contig for binning"/>
            <param argument="--maxP" type="integer" min="1" max="100" value="95" label="Percentage of good contigs considered for binning decided by connection among contigs" help="The greater, the more sensitive"/>
            <param argument="--minS" type="integer" min="1" max="99" value="60" label="Minimum score of an edge for binning" help="The greater, the more specific"/>
            <param argument="--maxEdges" type="integer" value="200" label="Maximum number of edges per node" help="The greater, the more sensitive"/>
            <param argument="--pTNF" type="integer" value="0" label="TNF probability cutoff for building TNF graph" help="Use it to skip the preparation step (0:auto)"/>
            <param argument="--noAdd" type="boolean" truevalue="--noAdd" falsevalue="" checked="false" label="Turn off additional binning for lost or small contigs?"/>
            <param argument="--minCV" type="float" value="1.0" label="Minimum mean coverage of a contig in each library for binning"/>
            <param argument="--minCVSum" type="float" value="1.0" label="Minimum total effective mean coverage of a contig for binning" help="Sum of depth over minCV"/>
            <param argument="--seed" type="integer" value="0" label="For exact reproducibility." help="Zero value will use random seed"/>
        </section>
        <section name="out" title="Output options">
            <param argument="--minClsSize" type="integer" value="200000" label="Minimum size of a bin as the output"/>
            <param argument="--onlyLabel" type="boolean" truevalue="--onlyLabel" falsevalue="" checked="false" label="Output only sequence labels as a list in a column without sequences?"/>
            <param argument="--saveCls" type="boolean" truevalue="--saveCls" falsevalue="" checked="false" label="Save cluster memberships as a matrix format?"/>
            <param name="extra_outputs" type="select" multiple="true" optional="true" label="Extra outputs">
                <option value="lowDepth">Fasta file containing low depth bins (if no matrix format and label selected)</option>
                <option value="tooShort">Fasta file containing too short bins (if no matrix format and label selected)</option>
                <option value="unbinned">Fasta file containing unbinned contigs (if no matrix format and label selected)</option>
                <option value="log">Process log file</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <collection name="bins" type="list" label="${tool.name} on ${on_string}: Bin sequences">
            <filter>not out['saveCls'] and not out['onlyLabel']</filter>
            <discover_datasets pattern="bin\.(?P&lt;designation&gt;\d*)\.fa" format="fasta" directory="bins"/>
        </collection>
        <data name="bin_saveCls" format="tabular" from_work_dir="bins/bin" label="${tool.name} on ${on_string}: Bins with cluster memberships">
            <filter>out['saveCls'] and not out['onlyLabel']</filter>
        </data>
        <collection name="bin_onlyLabel" type="list" label="${tool.name} on ${on_string}: Bin labels">
            <filter>not out['saveCls'] and out['onlyLabel']</filter>
            <discover_datasets pattern="bin\.(?P&lt;designation&gt;\d*)" format="tabular" directory="bins"/>
        </collection>
        <data name="lowDepth" format="fasta" from_work_dir="bins/bin.lowDepth.fa" label="${tool.name} on ${on_string}: Low depth bins">
            <filter>not out['saveCls'] and not out['onlyLabel'] and 'lowDepth' in out['extra_outputs']</filter>
        </data>
        <data name="tooShort" format="fasta" from_work_dir="bins/bin.tooShort.fa" label="${tool.name} on ${on_string}: Too short bins">
            <filter>not out['saveCls'] and not out['onlyLabel'] and 'tooShort' in out['extra_outputs']</filter>
        </data>
        <data name="unbinned" format="fasta" from_work_dir="bins/bin.unbinned.fa" label="${tool.name} on ${on_string}: Unbinned sequences">
            <filter>not out['saveCls'] and not out['onlyLabel'] and 'unbinned' in out['extra_outputs']</filter>
        </data>
        <data name="process_log" format="txt" label="${tool.name} on ${on_string}: Process log">
            <filter>'log' in out['extra_outputs']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="5">
            <param name="inFile" value="input0.fasta.gz" ftype="fasta.gz"/>
            <section name="advanced">
                <conditional name="base_coverage_depth_cond">
                    <param name="base_coverage_depth" value="no"/>
                </conditional>
                <param name="seed" value="345678"/>
            </section>
            <section name="out">
                <param name="extra_outputs" value="lowDepth,tooShort,unbinned,log"/>
            </section>
            <output_collection name="bins" type="list" count="2">
                <element name="1" ftype="fasta">
                    <assert_contents>
                        <has_size value="334554"/>
                        <has_text text=">MW127517.1"/>
                        <has_text text=">MW127521.1"/>
                    </assert_contents>
                </element>
                <element name="2" ftype="fasta">
                    <assert_contents>
                        <has_size value="577866"/>
                        <has_text text=">MT810119.1"/>
                        <has_text text=">MW130866.1"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="lowDepth" ftype="fasta">
                <assert_contents>
                    <has_text text=""/>
                </assert_contents>
            </output>
            <output name="tooShort" ftype="fasta">
                <assert_contents>
                    <has_text text=""/>
                </assert_contents>
            </output>
            <output name="unbinned" ftype="fasta">
                <assert_contents>
                    <has_text text=">NC_045512.2"/>
                    <has_text text=">MW130872.1"/>
                </assert_contents>
            </output>
            <output name="process_log" ftype="txt">
                <assert_contents>
                    <has_text text="MetaBAT"/>
                    <has_text text="2 bins (897090 bases in total) formed"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="inFile" value="input0.fasta.gz" ftype="fasta.gz"/>
            <section name="advanced">
                <conditional name="base_coverage_depth_cond">
                    <param name="base_coverage_depth" value="yes"/>
                    <param name="abdFile" value="input_depth1.tabular" ftype="tabular"/>
                </conditional>
                <param name="seed" value="345678"/>
            </section>
            <section name="out">
                <param name="onlyLabel" value="false"/>
                <param name="saveCls" value="false"/>
                <param name="extra_outputs" value=""/>
            </section>
            <output_collection name="bins" type="list" count="0"/>
        </test>
        <test expect_num_outputs="1">
            <param name="inFile" value="input0.fasta.gz" ftype="fasta.gz"/>
            <section name="advanced">
                <conditional name="base_coverage_depth_cond">
                    <param name="base_coverage_depth" value="no"/>
                </conditional>
                <param name="seed" value="345678"/>
            </section>
            <section name="out">
                <param name="onlyLabel" value="false"/>
                <param name="saveCls" value="true"/>
                <param name="extra_outputs" value=""/>
            </section>
            <output name="bin_saveCls" ftype="tabular">
                <assert_contents>
                    <has_text text="NC_045512.2"/>
                    <has_text text="MT810119.1"/>
                    <has_text text="MW130860.1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="inFile" value="input0.fasta.gz" ftype="fasta.gz"/>
            <section name="advanced">
                <conditional name="base_coverage_depth_cond">
                    <param name="base_coverage_depth" value="no"/>
                </conditional>
                <param name="seed" value="345678"/>
            </section>
            <section name="out">
                <param name="onlyLabel" value="true"/>
                <param name="saveCls" value="false"/>
                <param name="extra_outputs" value=""/>
            </section>
            <output_collection name="bin_onlyLabel" type="list" count="2">
                <element name="1" ftype="tabular">
                    <assert_contents>
                        <has_text text="MW127518.1"/>
                        <has_text text="MW127521.1"/>
                    </assert_contents>
                </element>
                <element name="2" ftype="tabular">
                    <assert_contents>
                        <has_text text="MT810119.1"/>
                        <has_text text="MW130866.1"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

MetaBAT2 (Metagenome Binning based on Abundance and Tetranucleotide frequency) is an automated metagenome binning
software that integrates empirical probabilistic distances of genome abundance and tetranucleotide frequency.  The
tool accepts a fast file containing contigs and produces a collection (i.e., bins) of fasta files.
        
MetaBAT2 includes optionizations to MetaBAT.  It requires virtually no parameter optimization. Default parameter
values are more reliable to use in most cases since MetaBAT2 adapts to the given data to find the best parameter.
Some parameter settings are still available for advanced users, helping to manage some exceptional cases by changing
the amount of data used for the analysis.

**More information**

https://bitbucket.org/berkeleylab/metabat/src/master/

**Options**

 * **Use base coverage depth file** - optionally select a base coverage depth file that was either generated by the Calculate contig depths for MetaBAT2 tool or another 3rd party tool.
 
    ]]></help>
    <expand macro="citations"/>
</tool>