Mercurial > repos > iuc > mummer_nucmer

<tool id="mummer_nucmer" name="Nucmer" version="@MUMMER_VERSION@">
    <description>Align two or more sequences</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="5.2.3">gnuplot</requirement>
    </expand>
    <command detect_errors="exit_code">
        <![CDATA[
        ln -s $reference_sequence reference.fa &&
        ln -s $query_sequence query.fa &&
        nucmer
            $anchoring
            -b '$breaklen'
            -c '$mincluster'
            -D '$diagdiff'
            -d '$diagfactor'
            $noextend
            $direction
            -g '$maxgap'
            -l '$minmatch'
            -L '$minalign'
            $nooptimize
            $nosimplify
            --threads "\${GALAXY_SLOTS:-1}"
            #if $options.advanced == 'enable':
                $options.banded
                $options.large
                $options.genome
                -M '$options.max_chunk'
            #end if
            'reference.fa' 'query.fa'
            #if $mumplot.plot == 'yes':
                && mummerplot
                    #if $mumplot.sequences.seq_input == 'yes':
                        -R '$reference_sequence'
                        -Q '$query_sequence'
                        $mumplot.sequences.layout
                    #end if
                    -b '$mumplot.breaklen'
                    $mumplot.color
                    $mumplot.coverage
                    $mumplot.filter
                    $mumplot.fat
                    #if $mumplot.labels.IDs == 'yes':
                        -IdR '$mumplot.labels.ref_id'
                        -IdQ '$mumplot.labels.query_id'
                    #end if
                    -s '$mumplot.size'
                    -terminal png
                    -title '$mumplot.title'
                    $mumplot.snp
                    #if $mumplot.range.custom == 'yes':
                        -x [$mumplot.range.min_x:$mumplot.range.max_x]
                        -y [$mumplot.range.min_y:$mumplot.range.max_y]
                    #end if
                    'out.delta'
            #end if
        ]]>
    </command>
    <inputs>
        <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="FastA or multi-FastA" />
        <param name="query_sequence" type="data" format="fasta" label="Query Sequence" help="FastA or multi-FastA" />
        <param name="anchoring" type="select" label="Anchoring" help="Choose a match anchoring strategy">
            <option value="">Use default</option>
            <option value="--mum">Unique matches only (--mum)</option>
            <option value="--maxmatch">All matches (--maxmatch)</option>
        </param>
        <param name="breaklen" type="integer" argument="-b" value="200" label="Break Length"
            help="Set the distance an alignment extension will attempt to extend poor scoring regions before giving up. (-b)" />
        <param name="mincluster" type="integer" argument="-c" value="65" label="Minumum Cluster Length" help="Sets the minimum length of a cluster of matches. (-c)" />
        <param name="diagdiff" type="integer" argument="-D" value="5" label="Maximum Diagonal Difference"
            help="Set the maximum diagonal difference between two adjacent anchors in a cluster. (-D)" />
        <param name="diagfactor" type="float" argument="-d" value="0.12" label="Maximum Diagonal Difference"
            help="Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length. (-d)" />
        <param name="noextend" type="boolean" argument="--noextend" truevalue="--noextend" falsevalue="" label="No Extend" help="Do not perform cluster extension step. (--noextend)" />
        <param name="direction" type="select" label="Direction" help="Choose a direction of Query Sequence to Use">
            <option value="">Use foward and reverse sequences</option>
            <option value="-f">Use only forward sequence of query (-f)</option>
            <option value="-r">Use only reverese sequence of query (-r)</option>
        </param>
        <param name="maxgap" type="integer" argument="-g" value="90" label="Maximum Gap Distance" help="Set the maximum gap between two adjacent matches in a cluster. (-g)" />
        <param name="minmatch" type="integer" argument="-l" value="20" label="Minimum Match Length" help="Set the minimum length of a single exact match. (-l)" />
        <param name="minalign" type="integer" argument="-L" value="0" label="Minumum Alignment Length" help="Minimum length of an alignment, after clustering and extension. (-L)" />
        <param name="nooptimize" type="boolean" argument="--nooptimize" truevalue="--nooptimize" falsevalue="" label="No Alignment Score Optimization"
            help="No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence. (--nooptimize)" />
        <param name="nosimplify" type="boolean" argument="--nosimplify" truevalue="--nosimplify" falsevalue="" label="Don't Simplify Alignments"
            help="Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats. (--nosimplify)" />
        <conditional name="options">
            <param name="advanced" type="select" label="Additional options">
                <option value="defaults">Use defaults</option>
                <option value="enable">Select additional options</option>
            </param>
            <when value="enable">
                <param name="banded" type="boolean" argument="--banded" truevalue="--banded" falsevalue="" label="Banding"
                    help="Enforce absolute banding of dynamic programming matrix based on diagdiff parameter. (--banded)" />
                <param name="large" type="boolean" argument="--large" truevalue="--large" falsevalue="" label="Offsets" help="Force the use of large offsets. (--large)" />
                <param name="genome" type="boolean" argument="-G" truevalue="-G" falsevalue="" label="Map genome to genome" help="For long query sequences. (-G)" />
                <param name="max_chunk" type="integer" argument="-M" value="50000" label="Max Chunk" help="Stop adding sequence for a thread if more than MAX already. (-M)" />
            </when>
            <when value="defaults" />
        </conditional>
        <conditional name="mumplot" >
            <param name="plot" type="select" label="Do you want to output a 2-D dotplot of the input sequences? (mummerplot)" >
                <option value="yes">YES</option>
                <option value="no">NO</option>
            </param>
            <when value="yes" >
                <expand macro="mumplot_input" >
                    <conditional name="sequences" >
                        <param name="seq_input" type="select" label="Plot an ordered set of reference/query sequences?" >
                            <option value="no">NO</option>
                            <option value="yes">YES</option>
                        </param>
                        <when value="yes">
                            <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="(-R)" />
                            <param name="query_sequence" type="data" format="fasta" multiple="True" label="Query Sequence(s)" help="(-Q)" />
                            <param name="layout" type="boolean" argument="--layout" truevalue="--layout" falsevalue="" label="Layout" help="Layout a .delta multiplot in an intelligible fashion. (--layout)" />
                        </when>
                        <when value="no" />
                    </conditional>
                </expand>
            </when>
            <when value="no" />
        </conditional>
    </inputs>
    <outputs>
        <data name="delta_output" format="tabular" from_work_dir="out.delta" label="${tool.name} on ${on_string}: alignment" />
        <data name="png_output" format="png" from_work_dir="out.png" label="${tool.name} on ${on_string}: plot" >
            <filter>mumplot['plot'] == 'yes'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="advanced" value="defaults" />
            <param name="plot" value="yes" />
            <param name="seq_input" value="yes" />
            <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/>
            <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" />
            <output name="delta_output" ftype="tabular" compare="diff" lines_diff="2" value="nucmer.txt"/>
            <output name="png_output" ftype="png" compare="sim_size" value="plot.png" />
        </test>
    </tests>
    <help><![CDATA[
        nucmer is for the all-vs-all comparison of nucleotide sequences contained in multi-FastA data files. It is best used for highly similar sequence that may have large rearrangements. Common use cases are: comparing two unfinished shotgun sequencing assemblies, mapping an unfinished sequencing assembly to a finished genome, and comparing two fairly similar genomes that may have large rearrangements and duplications.

        All output coordinates reference the forward strand of the involved sequence, regardless of the match direction. Also, nucmer now uses only matches that are unique in the reference sequence by default, use different Anchoring options to change this behavior.

**Options:**::

    Defaults in parentheses

    nucmer

    --mum             Use anchor matches that are unique in both the reference and query (false)

    --maxmatch        Use all anchor matches regardless of their uniqueness (false)

    -b                Set the distance an alignment extension will attempt to extend poor scoring regions
                      before giving up (200)

    -c                Sets the minimum length of a cluster of matches (65)

    -D                Set the maximum diagonal difference between two adjacent anchors in a cluster (5)

    -d                Set the maximum diagonal difference between two adjacent anchors in a cluster as a
                      differential fraction of the gap length (0.12)

    --noextend        Do not perform cluster extension step (false)

    -f                Use only the forward strand of the Query sequences (false)

    -r                Use only the reverse complement of the Query sequences (false)

    -g                Set the maximum gap between two adjacent matches in a cluster (90)

    -l                Set the minimum length of a single exact match (20)

    -L                Minimum length of an alignment, after clustering and extension (0)

    --nooptimize      No alignment score optimization, i.e. if an alignment extension reaches the end of a
                      sequence, it will not backtrack to optimize the alignment score and instead terminate
                      the alignment at the end of the sequence (false)

    --nosimplify      Don't simplify alignments by removing shadowed clusters. Use this option when aligning
                      a sequence to itself to look for repeats (false)

    --banded          Enforce absolute banding of dynamic programming matrix based on diagdiff parameter (false)

    --large           Force the use of large offsets (false)

    -G                Map genome to genome (long query sequences) (false)

    -M                Max chunk. Stop adding sequence for a thread if more than MAX already. (50000)

    mummerplot

    -b             Highlight alignments with breakpoints further than breaklen nucleotides from the nearest
                   sequence end

    -color         Color plot lines with a percent similarity gradient or turn off all plot color (default
                   color by match dir) If the plot is very sparse, edit the .gp script to plot with
                   'linespoints' instead of 'lines'

    -c             Generate a reference coverage plot (default for .tiling)

    --filter       Only display .delta alignments which represent the "best" hit to any particular spot on
                   either sequence, i.e. a one-to-one mapping of reference and query subsequences

    --fat          Layout sequences using fattest alignment only

    -IdR           Plot a particular reference sequence ID on the X-axis

    -IdQ           Plot a particular query sequence ID on the Y-axis

    -s             Set the output size to small, medium or large (--small) (--medium) (--large) (default 'small')

    --SNP          Highlight SNP locations in each alignment

    -title         Specify the gnuplot plot title (default none)

    -x             Set the xrange for the plot '[min:max]'

    -y             Set the yrange for the plot '[min:max]'

    -R             Plot an ordered set of reference sequences from Rfile

    -Q             Plot an ordered set of query sequences from Qfile

    --layout       Layout a .delta multiplot in an intelligible fashion, this option requires the -R -Q options

    ]]></help>
        <expand macro="citation" />
</tool>
author	iuc
date	Wed, 05 Dec 2018 02:37:36 -0500
parents
children	5b0b49b5421c