annotate mashmap.xml @ 10:08a74c1a4562 draft default tip

planemo upload
author fubar
date Sat, 24 Feb 2024 08:29:59 +0000
parents dc53eb4354a6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
53f601fb8664 planemo upload
fubar
parents: 6
diff changeset
1 <tool name="mashmap" id="mashmap" version="1.19.2" profile="22.05">
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
10
08a74c1a4562 planemo upload
fubar
parents: 9
diff changeset
3 <!--Created by toolfactory@galaxy.org at 24/02/2024 19:28:59 using the Galaxy Tool Factory.-->
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
4 <description>Fast local alignment boundaries</description>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
5 <requirements>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
6 <requirement version="3.1.3" type="package">mashmap</requirement>
7
53f601fb8664 planemo upload
fubar
parents: 6
diff changeset
7 <requirement version="1.19.2" type="package">samtools</requirement>
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
8 </requirements>
7
53f601fb8664 planemo upload
fubar
parents: 6
diff changeset
9 <version_command><![CDATA[echo "1.19.2"]]></version_command>
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
10 <command><![CDATA[bash '$runme']]></command>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
11 <configfiles>
9
dc53eb4354a6 planemo upload
fubar
parents: 8
diff changeset
12 <configfile name="runme"><![CDATA[ln -s '$query' 'query' &&
dc53eb4354a6 planemo upload
fubar
parents: 8
diff changeset
13 #if len($reflist) > 1:
5
10e4181a6443 planemo upload
fubar
parents: 4
diff changeset
14 #for i, mash in enumerate($reflist):
10e4181a6443 planemo upload
fubar
parents: 4
diff changeset
15 #if i == 0:
6
27df186d5446 planemo upload
fubar
parents: 5
diff changeset
16 echo '$mash' > 'reflist' &&
5
10e4181a6443 planemo upload
fubar
parents: 4
diff changeset
17 #else:
6
27df186d5446 planemo upload
fubar
parents: 5
diff changeset
18 echo '$mash' >> 'reflist' &&
5
10e4181a6443 planemo upload
fubar
parents: 4
diff changeset
19 #end if
10e4181a6443 planemo upload
fubar
parents: 4
diff changeset
20 #end for
10e4181a6443 planemo upload
fubar
parents: 4
diff changeset
21 #end if
9
dc53eb4354a6 planemo upload
fubar
parents: 8
diff changeset
22 samtools faidx 'query' &&
5
10e4181a6443 planemo upload
fubar
parents: 4
diff changeset
23 mashmap --pi '$perc_identity' -s '$seqLength' -f '$filtermode' $dense \
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
24 #if int($sketchSize) > 0:
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
25 -J '$sketchSize' \
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
26 #end if
10
08a74c1a4562 planemo upload
fubar
parents: 9
diff changeset
27 #if $dense:
08a74c1a4562 planemo upload
fubar
parents: 9
diff changeset
28 --dense \
08a74c1a4562 planemo upload
fubar
parents: 9
diff changeset
29 #end if
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
30 #if len($reflist) == 1:
9
dc53eb4354a6 planemo upload
fubar
parents: 8
diff changeset
31 -r '$reflist' -q 'query' &&
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
32 #else
9
dc53eb4354a6 planemo upload
fubar
parents: 8
diff changeset
33 --rl 'reflist' -q 'query' &&
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
34 #end if
6
27df186d5446 planemo upload
fubar
parents: 5
diff changeset
35 cp 'mashmap.out' '$mashout']]></configfile>
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
36 </configfiles>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
37 <inputs>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
38 <param name="query" type="data" optional="false" label="Query sequences (as fasta) to mash against the references supplied below" help="" format="fasta" multiple="false"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
39 <param name="reflist" type="data" optional="false" label="Reference or references to mash the query sequences on" help="Choose one or more reference sequences to mash the query sequences against." format="fasta" multiple="true"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
40 <param name="perc_identity" type="float" value="85.0" label="Identity threshold" help="By default, it is set to 85, implying mappings with 85 or more identity should be reported. For example, it can be set to 80to account for more noisy long-read datasets or 95 for mapping human genome assembly to human reference."/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
41 <param name="seqLength" type="integer" value="5000" label="Minimum segment length" help="Default is 5,000 bp. Sequences below this length are ignored. Mashmap provides guarantees on reporting local alignments of length twice this value."/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
42 <param name="sketchSize" type="integer" value="0" label="Sketch size - leave 0 for automatic setting based" help="This parameter sets the seed density of the winnowing scheme, gauranteeing that the minhash will be calculated from a sample of sketchSize k-mers for each segment. It is set automatically based on --pi but can be manually set as well."/>
10
08a74c1a4562 planemo upload
fubar
parents: 9
diff changeset
43 <param name="dense" type="boolean" value="false" label="Dense sketching" help="This flag will increase the seed density substantially, resulting in a density of roughly 0.02 * (1 + (1 - pi) / .05) where pi is the perc_identity threshold. This leads to longer runtimes and higher RAM usage, but significantly more accurate estimates of ANI." checked="false" truevalue="--dense" falsevalue=""/>
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
44 <param name="filtermode" type="select" label="Filter mode" help="Mashmap implements a plane-sweep based algorithm to perform the alignment filtering. Similar to delta-filter in nucmer, different filtering options are provided that are suitable for long read or assembly mapping. Option -f map is suitable for reporting the best mappings for long reads, whereas -f one-to-one is suitable for reporting orthologous mappings among all computed assembly to genome mappings.">
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
45 <option value="map">map - best mapping for long reads</option>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
46 <option value="one-to-one">one-to-one - best for mapping orthologous reads</option>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
47 <option value="none">None</option>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
48 </param>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
49 </inputs>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
50 <outputs>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
51 <data name="mashout" format="paf" label="mashmap on $query.element_identifier" hidden="false"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
52 </outputs>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
53 <tests>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
54 <test>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
55 <output name="mashout" value="mashout_sample" compare="diff" lines_diff="0"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
56 <param name="query" value="query_sample"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
57 <param name="reflist" value="reflist_sample"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
58 <param name="perc_identity" value="85.0"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
59 <param name="seqLength" value="5000"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
60 <param name="sketchSize" value="0"/>
10
08a74c1a4562 planemo upload
fubar
parents: 9
diff changeset
61 <param name="dense" value="false"/>
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
62 <param name="filtermode" value="map"/>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
63 </test>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
64 </tests>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
65 <help><![CDATA[
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
66 *MashMap* implements a fast and approximate algorithm for computing local alignment boundaries between long DNA sequences. It can be useful for mapping genome assembly or long reads (PacBio/ONT) to reference genome(s). Given a minimum alignment length and an identity threshold for the desired local alignments,
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
67
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
68 Mashmap computes alignment boundaries and identity estimates using k-mers. It does not compute the alignments explicitly, but rather estimates an unbiased k-mer based Jaccard similarity using a combination of minmers (a novel winnowing scheme) and MinHash. This is then converted to an estimate of sequence identity using the Mash distance. An appropriate k-mer sampling rate is automatically determined using the given minimum local alignment length and identity thresholds.
8
9ba0184870ef planemo upload
fubar
parents: 7
diff changeset
69
9ba0184870ef planemo upload
fubar
parents: 7
diff changeset
70 Output is in *paf* format
9ba0184870ef planemo upload
fubar
parents: 7
diff changeset
71 This is space-delimited, with each line consisting of query name, length, 0-based start, end, strand, target name, length, start, end and mapping nucleotide identity.
9ba0184870ef planemo upload
fubar
parents: 7
diff changeset
72 Details at https://github.com/lh3/miniasm/blob/master/PAF.md
9ba0184870ef planemo upload
fubar
parents: 7
diff changeset
73
9ba0184870ef planemo upload
fubar
parents: 7
diff changeset
74 More details at the Mashmap github repository https://github.com/marbl/MashMap
9ba0184870ef planemo upload
fubar
parents: 7
diff changeset
75
2
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
76 ]]></help>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
77 <citations>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
78 <citation type="doi">10.1093/bioinformatics/btad512</citation>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
79 <citation type="doi">10.1093/bioinformatics/bts573</citation>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
80 </citations>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
81 </tool>
6c6bf2bee1ca planemo upload
fubar
parents:
diff changeset
82