comparison mashmap.xml @ 0:a3a6b0b31f2d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mashmap commit 4d07f324b25c62ef0b56b22dfff84af87d54d142
author iuc
date Mon, 26 Feb 2024 11:41:43 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a3a6b0b31f2d
1 <tool name="mashmap" id="mashmap" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
2 <description>Fast local alignment boundaries</description>
3 <macros>
4 <token name="@TOOL_VERSION@">3.1.3</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <requirements>
8 <requirement version="@TOOL_VERSION@" type="package">mashmap</requirement>
9 </requirements>
10 <version_command>mashmap --version</version_command>
11 <command><![CDATA[
12 #if len($reflist) > 1:
13 #for $r in $reflist:
14 echo '$r' >> ./reflist &&
15 #end for
16 cat ./reflist &&
17 #end if
18 #if len($query) > 1:
19 #for $q in $query:
20 echo '$q' >> ./query &&
21 #end for
22 cat ./query &&
23 #end if
24 mashmap
25 --threads \${GALAXY_SLOTS:-1}
26 --perc_identity $perc_identity
27 --segLength $seqLength
28 --filter_mode $filter_mode
29 $reportPercentage
30 $dense
31 $noMerge
32 $noHgFilter
33 #if $kmerThreshold:
34 --kmerThreshold $kmerThreshold
35 #end if
36 #if $kmerComplexity
37 --kmerComplexity $kmerComplexity
38 #end if
39 #if int($sketchSize) > 0:
40 -J $sketchSize
41 #end if
42 #if len($reflist) == 1:
43 -r '$reflist'
44 #else
45 --rl ./reflist
46 #end if
47 #if len($query) == 1:
48 -q '$query'
49 #else
50 --ql ./query
51 #end if
52
53 ]]> </command>
54 <inputs>
55 <param name="query" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true"
56 label="Query sequences to mash against the references supplied below"/>
57 <param name="reflist" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true"
58 label="Reference or references to mash the query sequences on"
59 help="Choose one or more reference sequences to mash the query sequences against."/>
60 <param argument="--perc_identity" type="float" value="85.0" label="Identity threshold"
61 help="By default, it is set to 85, implying mappings with 85 or more identity should be reported. For example, it can be set to 80to account for more noisy long-read datasets or 95 for mapping human genome assembly to human reference."/>
62 <param argument="--seqLength" type="integer" value="5000" min="1" label="Minimum segment length"
63 help="Default is 5,000 bp. Sequences below this length are ignored. Mashmap provides guarantees on reporting local alignments of length twice this value."/>
64 <param argument="--sketchSize" type="integer" value="0" label="Sketch size - leave 0 for automatic setting based"
65 help="This parameter sets the seed density of the winnowing scheme, gauranteeing that the minhash will be calculated from a sample of sketchSize k-mers for each segment. It is set automatically based on --pi but can be manually set as well."/>
66 <param argument="dense" type="boolean" truevalue="--dense" falsevalue="" label="Dense sketching"
67 help="This flag will increase the seed density substantially, resulting in a density of roughly 0.02 * (1 + (1 - pi) / .05) where pi is the perc_identity threshold. This leads to longer runtimes and higher RAM usage, but significantly more accurate estimates of ANI."/>
68 <param argument="--kmerThreshold" type="float" min="0.0" max="0.0" optional="true" label="Ignore the top % most-frequent kmer window" />
69 <param argument="--kmerComplexity" type="float" min="0.0" max="1.0" optional="true" label="Threshold for kmer complexity" />
70 <param argument="filter_mode" type="select" label="Filter mode" help="Mashmap implements a plane-sweep based algorithm to perform the alignment filtering. Similar to delta-filter in nucmer, different filtering options are provided that are suitable for long read or assembly mapping. Option -f map is suitable for reporting the best mappings for long reads, whereas -f one-to-one is suitable for reporting orthologous mappings among all computed assembly to genome mappings.">
71 <option value="map" selected="true">map - best mapping for long reads</option>
72 <option value="one-to-one">one-to-one - best for mapping orthologous reads</option>
73 <option value="none">None</option>
74 </param>
75 <param argument="--reportPercentage" type="boolean" truevalue="--reportPercentage" falsevalue="" checked="false"
76 label="Report predicted ANI values in [0, 100]"
77 help="instead of [0,1]" />
78 <param argument="--noMerge" type="boolean" truevalue="--noMerge" falsevalue="" checked="false"
79 label="Don't merge consecutive segment-level mappings" />
80 <param argument="--noHgFilter" type="boolean" truevalue="--noHgFilter" falsevalue="" checked="false" label="Use MashMap2 first pass filtering"
81 help="Don't use the hypergeometric filtering and instead use the MashMap2 first pass filtering." />
82 </inputs>
83 <outputs>
84 <data name="mashout" format="paf" from_work_dir="mashmap.out" />
85 </outputs>
86 <tests>
87 <test expect_num_outputs="1">
88 <param name="query" value="query_sample.fasta" ftype="fasta"/>
89 <param name="reflist" value="reflist_sample.fasta" ftype="fasta"/>
90 <param name="perc_identity" value="85.0"/>
91 <param name="seqLength" value="5000"/>
92 <param name="sketchSize" value="0"/>
93 <param name="dense" value="true"/>
94 <param name="filter_mode" value="map"/>
95 <output name="mashout" value="mashout_sample.paf" ftype="paf"/>
96 </test>
97 <test expect_num_outputs="1">
98 <param name="query" value="query_sample.fasta.gz" ftype="fasta.gz"/>
99 <param name="reflist" value="reflist_sample.fasta.gz" ftype="fasta.gz"/>
100 <param name="perc_identity" value="85.0"/>
101 <param name="seqLength" value="5000"/>
102 <param name="sketchSize" value="0"/>
103 <param name="dense" value="true"/>
104 <param name="filter_mode" value="map"/>
105 <output name="mashout" value="mashout_sample.paf" ftype="paf"/>
106 </test>
107 <test expect_num_outputs="1">
108 <param name="query" value="query_sample.fasta.gz,query_sample.fasta.gz" ftype="fasta.gz"/>
109 <param name="reflist" value="reflist_1_sample.fasta.gz,reflist_2_sample.fasta.gz" ftype="fasta.gz"/>
110 <param name="perc_identity" value="85.0"/>
111 <param name="seqLength" value="5000"/>
112 <param name="sketchSize" value="0"/>
113 <param name="dense" value="true"/>
114 <param name="filter_mode" value="map"/>
115 <output name="mashout" value="mashout_multi_sample.paf" ftype="paf"/>
116 </test>
117 </tests>
118 <help><![CDATA[
119 *MashMap* implements a fast and approximate algorithm for computing local alignment boundaries between long DNA sequences.
120 It can be useful for mapping genome assembly or long reads (PacBio/ONT) to reference genome(s).
121 Given a minimum alignment length and an identity threshold for the desired local alignments,
122
123 Mashmap computes alignment boundaries and identity estimates using k-mers. It does not compute the alignments explicitly,
124 but rather estimates an unbiased k-mer based Jaccard similarity using a combination of minmers (a novel winnowing scheme) and MinHash.
125 This is then converted to an estimate of sequence identity using the Mash distance. An appropriate k-mer sampling rate
126 is automatically determined using the given minimum local alignment length and identity thresholds.
127
128 As an example, Mashmap can map a human genome assembly to the human reference genome in about one minute total execution
129 time and < 4 GB memory using just 8 CPU threads, achieving more than an order of magnitude improvement in both runtime and
130 memory over alternative methods. We describe the algorithms associated with Mashmap, and report on speed, scalability, and
131 accuracy of the software in the publications listed below. Unlike traditional mappers, MashMap does not compute exact sequence alignments.
132 In future, we plan to add an optional alignment support to generate base-to-base alignments.
133
134 The output is space-delimited with each line consisting of query name, length, 0-based start, end, strand, target name,
135 length, start, end and mapping nucleotide identity.
136
137 ]]></help>
138 <citations>
139 <citation type="doi">10.1093/bioinformatics/btad512</citation>
140 <citation type="doi">10.1093/bioinformatics/bts573</citation>
141 </citations>
142 </tool>
143