Mercurial > repos > iuc > mummer_nucmer
view nucmer.xml @ 0:a18fb4f826fc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mummer4 commit 8133565adbfc012fa54b96449c2a18d044049107
author | iuc |
---|---|
date | Wed, 05 Dec 2018 02:37:36 -0500 |
parents | |
children | 5b0b49b5421c |
line wrap: on
line source
<tool id="mummer_nucmer" name="Nucmer" version="@MUMMER_VERSION@"> <description>Align two or more sequences</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"> <requirement type="package" version="5.2.3">gnuplot</requirement> </expand> <command detect_errors="exit_code"> <![CDATA[ ln -s $reference_sequence reference.fa && ln -s $query_sequence query.fa && nucmer $anchoring -b '$breaklen' -c '$mincluster' -D '$diagdiff' -d '$diagfactor' $noextend $direction -g '$maxgap' -l '$minmatch' -L '$minalign' $nooptimize $nosimplify --threads "\${GALAXY_SLOTS:-1}" #if $options.advanced == 'enable': $options.banded $options.large $options.genome -M '$options.max_chunk' #end if 'reference.fa' 'query.fa' #if $mumplot.plot == 'yes': && mummerplot #if $mumplot.sequences.seq_input == 'yes': -R '$reference_sequence' -Q '$query_sequence' $mumplot.sequences.layout #end if -b '$mumplot.breaklen' $mumplot.color $mumplot.coverage $mumplot.filter $mumplot.fat #if $mumplot.labels.IDs == 'yes': -IdR '$mumplot.labels.ref_id' -IdQ '$mumplot.labels.query_id' #end if -s '$mumplot.size' -terminal png -title '$mumplot.title' $mumplot.snp #if $mumplot.range.custom == 'yes': -x [$mumplot.range.min_x:$mumplot.range.max_x] -y [$mumplot.range.min_y:$mumplot.range.max_y] #end if 'out.delta' #end if ]]> </command> <inputs> <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="FastA or multi-FastA" /> <param name="query_sequence" type="data" format="fasta" label="Query Sequence" help="FastA or multi-FastA" /> <param name="anchoring" type="select" label="Anchoring" help="Choose a match anchoring strategy"> <option value="">Use default</option> <option value="--mum">Unique matches only (--mum)</option> <option value="--maxmatch">All matches (--maxmatch)</option> </param> <param name="breaklen" type="integer" argument="-b" value="200" label="Break Length" help="Set the distance an alignment extension will attempt to extend poor scoring regions before giving up. (-b)" /> <param name="mincluster" type="integer" argument="-c" value="65" label="Minumum Cluster Length" help="Sets the minimum length of a cluster of matches. (-c)" /> <param name="diagdiff" type="integer" argument="-D" value="5" label="Maximum Diagonal Difference" help="Set the maximum diagonal difference between two adjacent anchors in a cluster. (-D)" /> <param name="diagfactor" type="float" argument="-d" value="0.12" label="Maximum Diagonal Difference" help="Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length. (-d)" /> <param name="noextend" type="boolean" argument="--noextend" truevalue="--noextend" falsevalue="" label="No Extend" help="Do not perform cluster extension step. (--noextend)" /> <param name="direction" type="select" label="Direction" help="Choose a direction of Query Sequence to Use"> <option value="">Use foward and reverse sequences</option> <option value="-f">Use only forward sequence of query (-f)</option> <option value="-r">Use only reverese sequence of query (-r)</option> </param> <param name="maxgap" type="integer" argument="-g" value="90" label="Maximum Gap Distance" help="Set the maximum gap between two adjacent matches in a cluster. (-g)" /> <param name="minmatch" type="integer" argument="-l" value="20" label="Minimum Match Length" help="Set the minimum length of a single exact match. (-l)" /> <param name="minalign" type="integer" argument="-L" value="0" label="Minumum Alignment Length" help="Minimum length of an alignment, after clustering and extension. (-L)" /> <param name="nooptimize" type="boolean" argument="--nooptimize" truevalue="--nooptimize" falsevalue="" label="No Alignment Score Optimization" help="No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence. (--nooptimize)" /> <param name="nosimplify" type="boolean" argument="--nosimplify" truevalue="--nosimplify" falsevalue="" label="Don't Simplify Alignments" help="Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats. (--nosimplify)" /> <conditional name="options"> <param name="advanced" type="select" label="Additional options"> <option value="defaults">Use defaults</option> <option value="enable">Select additional options</option> </param> <when value="enable"> <param name="banded" type="boolean" argument="--banded" truevalue="--banded" falsevalue="" label="Banding" help="Enforce absolute banding of dynamic programming matrix based on diagdiff parameter. (--banded)" /> <param name="large" type="boolean" argument="--large" truevalue="--large" falsevalue="" label="Offsets" help="Force the use of large offsets. (--large)" /> <param name="genome" type="boolean" argument="-G" truevalue="-G" falsevalue="" label="Map genome to genome" help="For long query sequences. (-G)" /> <param name="max_chunk" type="integer" argument="-M" value="50000" label="Max Chunk" help="Stop adding sequence for a thread if more than MAX already. (-M)" /> </when> <when value="defaults" /> </conditional> <conditional name="mumplot" > <param name="plot" type="select" label="Do you want to output a 2-D dotplot of the input sequences? (mummerplot)" > <option value="yes">YES</option> <option value="no">NO</option> </param> <when value="yes" > <expand macro="mumplot_input" > <conditional name="sequences" > <param name="seq_input" type="select" label="Plot an ordered set of reference/query sequences?" > <option value="no">NO</option> <option value="yes">YES</option> </param> <when value="yes"> <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="(-R)" /> <param name="query_sequence" type="data" format="fasta" multiple="True" label="Query Sequence(s)" help="(-Q)" /> <param name="layout" type="boolean" argument="--layout" truevalue="--layout" falsevalue="" label="Layout" help="Layout a .delta multiplot in an intelligible fashion. (--layout)" /> </when> <when value="no" /> </conditional> </expand> </when> <when value="no" /> </conditional> </inputs> <outputs> <data name="delta_output" format="tabular" from_work_dir="out.delta" label="${tool.name} on ${on_string}: alignment" /> <data name="png_output" format="png" from_work_dir="out.png" label="${tool.name} on ${on_string}: plot" > <filter>mumplot['plot'] == 'yes'</filter> </data> </outputs> <tests> <test> <param name="advanced" value="defaults" /> <param name="plot" value="yes" /> <param name="seq_input" value="yes" /> <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/> <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" /> <output name="delta_output" ftype="tabular" compare="diff" lines_diff="2" value="nucmer.txt"/> <output name="png_output" ftype="png" compare="sim_size" value="plot.png" /> </test> </tests> <help><![CDATA[ nucmer is for the all-vs-all comparison of nucleotide sequences contained in multi-FastA data files. It is best used for highly similar sequence that may have large rearrangements. Common use cases are: comparing two unfinished shotgun sequencing assemblies, mapping an unfinished sequencing assembly to a finished genome, and comparing two fairly similar genomes that may have large rearrangements and duplications. All output coordinates reference the forward strand of the involved sequence, regardless of the match direction. Also, nucmer now uses only matches that are unique in the reference sequence by default, use different Anchoring options to change this behavior. **Options:**:: Defaults in parentheses nucmer --mum Use anchor matches that are unique in both the reference and query (false) --maxmatch Use all anchor matches regardless of their uniqueness (false) -b Set the distance an alignment extension will attempt to extend poor scoring regions before giving up (200) -c Sets the minimum length of a cluster of matches (65) -D Set the maximum diagonal difference between two adjacent anchors in a cluster (5) -d Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length (0.12) --noextend Do not perform cluster extension step (false) -f Use only the forward strand of the Query sequences (false) -r Use only the reverse complement of the Query sequences (false) -g Set the maximum gap between two adjacent matches in a cluster (90) -l Set the minimum length of a single exact match (20) -L Minimum length of an alignment, after clustering and extension (0) --nooptimize No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence (false) --nosimplify Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats (false) --banded Enforce absolute banding of dynamic programming matrix based on diagdiff parameter (false) --large Force the use of large offsets (false) -G Map genome to genome (long query sequences) (false) -M Max chunk. Stop adding sequence for a thread if more than MAX already. (50000) mummerplot -b Highlight alignments with breakpoints further than breaklen nucleotides from the nearest sequence end -color Color plot lines with a percent similarity gradient or turn off all plot color (default color by match dir) If the plot is very sparse, edit the .gp script to plot with 'linespoints' instead of 'lines' -c Generate a reference coverage plot (default for .tiling) --filter Only display .delta alignments which represent the "best" hit to any particular spot on either sequence, i.e. a one-to-one mapping of reference and query subsequences --fat Layout sequences using fattest alignment only -IdR Plot a particular reference sequence ID on the X-axis -IdQ Plot a particular query sequence ID on the Y-axis -s Set the output size to small, medium or large (--small) (--medium) (--large) (default 'small') --SNP Highlight SNP locations in each alignment -title Specify the gnuplot plot title (default none) -x Set the xrange for the plot '[min:max]' -y Set the yrange for the plot '[min:max]' -R Plot an ordered set of reference sequences from Rfile -Q Plot an ordered set of query sequences from Qfile --layout Layout a .delta multiplot in an intelligible fashion, this option requires the -R -Q options ]]></help> <expand macro="citation" /> </tool>