mashmap: mashmap.xml annotate

annotate mashmap.xml @ 6:27df186d5446 draft

planemo upload

author	fubar
date	Sat, 24 Feb 2024 04:11:27 +0000
parents	10e4181a6443
children	53f601fb8664

rev	line source
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	1 <tool name="mashmap" id="mashmap" version="3.1.3" profile="22.05">
6c6bf2bee1ca planemo upload fubar parents: diff changeset	2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
6 27df186d5446 planemo upload fubar parents: 5 diff changeset	3 <!--Created by toolfactory@galaxy.org at 23/02/2024 21:34:16 using the Galaxy Tool Factory.-->
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	4 <description>Fast local alignment boundaries</description>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	5 <requirements>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	6 <requirement version="3.1.3" type="package">mashmap</requirement>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	7 </requirements>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	8 <version_command><![CDATA[echo "3.1.3"]]></version_command>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	9 <command><![CDATA[bash '$runme']]></command>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	10 <configfiles>
5 10e4181a6443 planemo upload fubar parents: 4 diff changeset	11 <configfile name="runme"><![CDATA[#if len($reflist) > 1:
10e4181a6443 planemo upload fubar parents: 4 diff changeset	12 #for i, mash in enumerate($reflist):
10e4181a6443 planemo upload fubar parents: 4 diff changeset	13 #if i == 0:
6 27df186d5446 planemo upload fubar parents: 5 diff changeset	14 echo '$mash' > 'reflist' &&
5 10e4181a6443 planemo upload fubar parents: 4 diff changeset	15 #else:
6 27df186d5446 planemo upload fubar parents: 5 diff changeset	16 echo '$mash' >> 'reflist' &&
5 10e4181a6443 planemo upload fubar parents: 4 diff changeset	17 #end if
10e4181a6443 planemo upload fubar parents: 4 diff changeset	18 #end for
10e4181a6443 planemo upload fubar parents: 4 diff changeset	19 #end if
10e4181a6443 planemo upload fubar parents: 4 diff changeset	20 mashmap --pi '$perc_identity' -s '$seqLength' -f '$filtermode' $dense \
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	21 #if int($sketchSize) > 0:
6c6bf2bee1ca planemo upload fubar parents: diff changeset	22 -J '$sketchSize' \
6c6bf2bee1ca planemo upload fubar parents: diff changeset	23 #end if
6c6bf2bee1ca planemo upload fubar parents: diff changeset	24 #if len($reflist) == 1:
6 27df186d5446 planemo upload fubar parents: 5 diff changeset	25 -r '$reflist' -q '$query' &&
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	26 #else
5 10e4181a6443 planemo upload fubar parents: 4 diff changeset	27 --rl 'reflist' -q '$query' &&
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	28 #end if
6 27df186d5446 planemo upload fubar parents: 5 diff changeset	29 cp 'mashmap.out' '$mashout']]></configfile>
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	30 </configfiles>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	31 <inputs>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	32 <param name="query" type="data" optional="false" label="Query sequences (as fasta) to mash against the references supplied below" help="" format="fasta" multiple="false"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	33 <param name="reflist" type="data" optional="false" label="Reference or references to mash the query sequences on" help="Choose one or more reference sequences to mash the query sequences against." format="fasta" multiple="true"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	34 <param name="perc_identity" type="float" value="85.0" label="Identity threshold" help="By default, it is set to 85, implying mappings with 85 or more identity should be reported. For example, it can be set to 80to account for more noisy long-read datasets or 95 for mapping human genome assembly to human reference."/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	35 <param name="seqLength" type="integer" value="5000" label="Minimum segment length" help="Default is 5,000 bp. Sequences below this length are ignored. Mashmap provides guarantees on reporting local alignments of length twice this value."/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	36 <param name="sketchSize" type="integer" value="0" label="Sketch size - leave 0 for automatic setting based" help="This parameter sets the seed density of the winnowing scheme, gauranteeing that the minhash will be calculated from a sample of sketchSize k-mers for each segment. It is set automatically based on --pi but can be manually set as well."/>
4 fba99cb9b0ef planemo upload fubar parents: 3 diff changeset	37 <param name="dense" type="select" label="Dense sketching" help="This flag will increase the seed density substantially, resulting in a density of roughly 0.02 * (1 + (1 - pi) / .05) where pi is the perc_identity threshold. This leads to longer runtimes and higher RAM usage, but significantly more accurate estimates of ANI.">
fba99cb9b0ef planemo upload fubar parents: 3 diff changeset	38 <option value="">No dense sketching</option>
fba99cb9b0ef planemo upload fubar parents: 3 diff changeset	39 <option value="--dense">Dense sketching</option>
3 aa2234f3b23a planemo upload fubar parents: 2 diff changeset	40 </param>
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	41 <param name="filtermode" type="select" label="Filter mode" help="Mashmap implements a plane-sweep based algorithm to perform the alignment filtering. Similar to delta-filter in nucmer, different filtering options are provided that are suitable for long read or assembly mapping. Option -f map is suitable for reporting the best mappings for long reads, whereas -f one-to-one is suitable for reporting orthologous mappings among all computed assembly to genome mappings.">
6c6bf2bee1ca planemo upload fubar parents: diff changeset	42 <option value="map">map - best mapping for long reads</option>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	43 <option value="one-to-one">one-to-one - best for mapping orthologous reads</option>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	44 <option value="none">None</option>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	45 </param>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	46 </inputs>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	47 <outputs>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	48 <data name="mashout" format="paf" label="mashmap on $query.element_identifier" hidden="false"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	49 </outputs>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	50 <tests>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	51 <test>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	52 <output name="mashout" value="mashout_sample" compare="diff" lines_diff="0"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	53 <param name="query" value="query_sample"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	54 <param name="reflist" value="reflist_sample"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	55 <param name="perc_identity" value="85.0"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	56 <param name="seqLength" value="5000"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	57 <param name="sketchSize" value="0"/>
4 fba99cb9b0ef planemo upload fubar parents: 3 diff changeset	58 <param name="dense" value=""/>
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	59 <param name="filtermode" value="map"/>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	60 </test>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	61 </tests>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	62 <help><![CDATA[
6c6bf2bee1ca planemo upload fubar parents: diff changeset	63 MashMap implements a fast and approximate algorithm for computing local alignment boundaries between long DNA sequences. It can be useful for mapping genome assembly or long reads (PacBio/ONT) to reference genome(s). Given a minimum alignment length and an identity threshold for the desired local alignments,
6c6bf2bee1ca planemo upload fubar parents: diff changeset	64
6c6bf2bee1ca planemo upload fubar parents: diff changeset	65 Mashmap computes alignment boundaries and identity estimates using k-mers. It does not compute the alignments explicitly, but rather estimates an unbiased k-mer based Jaccard similarity using a combination of minmers (a novel winnowing scheme) and MinHash. This is then converted to an estimate of sequence identity using the Mash distance. An appropriate k-mer sampling rate is automatically determined using the given minimum local alignment length and identity thresholds.
6c6bf2bee1ca planemo upload fubar parents: diff changeset	66
6c6bf2bee1ca planemo upload fubar parents: diff changeset	67 As an example, Mashmap can map a human genome assembly to the human reference genome in about one minute total execution time and < 4 GB memory using just 8 CPU threads, achieving more than an order of magnitude improvement in both runtime and memory over alternative methods. We describe the algorithms associated with Mashmap, and report on speed, scalability, and accuracy of the software in the publications listed below. Unlike traditional mappers, MashMap does not compute exact sequence alignments. In future, we plan to add an optional alignment support to generate base-to-base alignments.
6c6bf2bee1ca planemo upload fubar parents: diff changeset	68
6c6bf2bee1ca planemo upload fubar parents: diff changeset	69 Map set of query sequences against a reference genome:
6c6bf2bee1ca planemo upload fubar parents: diff changeset	70
6c6bf2bee1ca planemo upload fubar parents: diff changeset	71 mashmap -r reference.fna -q query.fa
6c6bf2bee1ca planemo upload fubar parents: diff changeset	72
6 27df186d5446 planemo upload fubar parents: 5 diff changeset	73 The output is a paf format file (https://github.com/lh3/miniasm/blob/master/PAF.md).
27df186d5446 planemo upload fubar parents: 5 diff changeset	74 Thi is space-delimited with each line consisting of query name, length, 0-based start, end, strand, target name, length, start, end and mapping nucleotide identity.
2 6c6bf2bee1ca planemo upload fubar parents: diff changeset	75
6c6bf2bee1ca planemo upload fubar parents: diff changeset	76 Map set of query seqences against a list of reference genomes:
6c6bf2bee1ca planemo upload fubar parents: diff changeset	77
6c6bf2bee1ca planemo upload fubar parents: diff changeset	78 mashmap --rl referenceList.txt -q query.fa
6c6bf2bee1ca planemo upload fubar parents: diff changeset	79
6c6bf2bee1ca planemo upload fubar parents: diff changeset	80 File 'referenceList.txt' containing the list of reference genomes should contain path to the reference genomes, one per line.
6c6bf2bee1ca planemo upload fubar parents: diff changeset	81
6c6bf2bee1ca planemo upload fubar parents: diff changeset	82 Source code: https://github.com/marbl/MashMap
6c6bf2bee1ca planemo upload fubar parents: diff changeset	83 ]]></help>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	84 <citations>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	85 <citation type="doi">10.1093/bioinformatics/btad512</citation>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	86 <citation type="doi">10.1093/bioinformatics/bts573</citation>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	87 </citations>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	88 </tool>
6c6bf2bee1ca planemo upload fubar parents: diff changeset	89

Mercurial > repos > fubar > mashmap

annotate mashmap.xml @ 6:27df186d5446 draft