view miniasm.xml @ 0:b3ec520dc2ed draft

planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/miniasm commit 49a3fba768d496b2b010947f61786cdcf0535e7d
author erasmus-medical-center
date Thu, 12 Jul 2018 05:12:33 -0400
parents
children f0a2f5a9b637
line wrap: on
line source

<tool id="miniasm" name="miniasm" version="0.2.0">
    <description>Ultrafast de novo assembly for long noisy reads </description>
    <requirements>
        <requirement type="package" version="0.2">miniasm</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
miniasm
-f '$readFile'
-m '$preselection.minMatch'
-i '$preselection.minIden'
-s '$preselection.minSpan'
-c '$preselection.minCov'
-o '$overlapping.minOvlp'
-h '$overlapping.maxHang'
-I '$overlapping.intThres'
-g '$graph.maxGapDiff'
-d '$graph.maxBubDist'
-e '$graph.minUtgSize'
-n '$graph.nRounds'
-F '$graph.finalDropRatio'
'$paf' > '$gfa'
    ]]></command>
    <inputs>
        <param name="readFile" type="data" format="fastq,fasta" optional="true" label="Sequence Reads" />
        <param name="paf" type="data" format="interval" label="PAF file" help="Pairwise mapping format" />
        <section name="preselection" title="Preselection options">
            <param argument="minMatch" type="integer" value="100" label="Drop mappings having less than this number of matching bases"
                   help="column 10 in PAF"/>
            <param argument="minIden" type="float" value="0.05" label="During read filtering, ignore mappings with col10/col11 below this value"
                   help="Ignored mappings are still used for read overlaps" />
            <param argument="minSpan" type="integer" value="1000" label="Drop mappings shorter than this number bp"
                   help="This option also affects the second round of read filtering and minimal overlap length"/>
            <param argument="minCov" type="integer" value="3" label="Minimal coverage by other reads"
                   help="In the first round of filtering, miniasm finds the longest region covered by this number or more reads. In the second round, it in addition requires each remaining base to be covered by this many bases at least minSpan/2 from the ends of other reads" />
        </section>
        <section name="overlapping" title="Overlapping options">
            <param argument="minOvlp" type="integer" value="1000" label="Minimal overlap length [same as minSpan]" />
            <param argument="maxHang" type="integer" value="1000" label="Maximum overhang length"
                   help="An overhang is an unmapped region that should be mapped given a true overlap or true containment. If the overhang is too long, the mapping is considered an internal match and will be ignored" />
            <param argument="intThres" type="float" value="0.08" label="Minimal ratio of mapping length to mapping+overhang length for a mapping considered a containment or an overlap"
                   help="This option has a similar role to MaxHang, except that it controls the ratio, not length" />
        </section>
        <section name="graph" title="Graph layout options">
            <param argument="maxGapDiff" type="integer" value="1000" label="Maximal gap differences between two reads in a mapping"
                   help="This parameter is only used for transitive reduction" />
            <param argument="maxBubDist" type="integer" value="50000" label="Maximal probing distance for bubble popping"
                   help="Bubbles longer than this value will not be popped" />
            <param argument="minUtgSize" type="integer" value="4" label="A unitig is considered small if it is composed of less than this many reads"
                   help="Miniasm may try to remove small unitigs at various steps"/>
            <param argument="nRounds" type="integer" value="3" label="Rounds of short overlap removal" />
            <param argument="finalDropRatio" type="float" value="0.8" label="Overlap drop ratio threshold after short unitig removal" />
        </section>
    </inputs>
    <outputs>
        <data name="gfa" format="tabular" label="${tool.name} on ${on_string}: Assembly Graph" />
    </outputs>
    <tests>
        <test>
            <param name="readFile" value="mini_reads.fq"/>
            <param name="paf" value="mini_reads.paf.gz"/>
            <output name="gfa" file="mini_reads.gfa"/>
        </test>
        <test>
            <param name="readFile" value="mini_reads.fq"/>
            <param name="minMatch" value="99"/>
            <param name="minIden" value="0.04"/>
            <param name="minSpan" value="999"/>
            <param name="minCov" value="2"/>
            <param name="minOvlp" value="999"/>
            <param name="maxHang" value="999"/>
            <param name="intThres" value="0.7"/>
            <param name="maxGapDiff" value="999"/>
            <param name="maxBubDist" value="45000"/>
            <param name="minUtgSize" value="3"/>
            <param name="nRounds" value="2"/>
            <param name="finalDropRatio" value="0.7"/>
            <param name="paf" value="mini_reads.paf.gz"/>
            <output name="gfa" file="reads_advanced.gfa"/>
        </test>
    </tests>
    <help><![CDATA[
Miniasm is a very fast OLC-based de novo assembler for noisy long reads. It takes all-vs-all read self-mappings
(typically by minimap) as input and outputs an assembly graph in the GFA format. Different from mainstream assemblers,
miniasm does not have a consensus step. It simply concatenates pieces of read sequences to generate the final unitig
sequences. Thus the per-base error rate is similar to the raw input reads.

    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btw152</citation>
    </citations>
</tool>