comparison smalt_map.xml @ 0:51ad86498414 draft

planemo upload for repository https://sourceforge.net/projects/smalt/ commit dad1050d2043119952eb284fcd089519f28e4255
author nml
date Wed, 27 Sep 2017 16:03:01 -0400
parents
children fae9ec82e10f
comparison
equal deleted inserted replaced
-1:000000000000 0:51ad86498414
1 <tool id="smalt" name="smalt" version="1.0.0" >
2 <description>Map query reads (FASTA/FASTQ) format onto the reference sequences</description>
3 <requirements>
4 <requirement type="package" version="0.7.6">smalt</requirement>
5 <requirement type="package" version="1.5">samtools</requirement>
6 </requirements>
7 <stdio>
8 <exit_code range="1:" level="fatal" description="Unknown error" />
9 <regex match="Command line error"
10 source="stdout"
11 level="fatal"
12 description="You cannot do that!! What were you thinking!" />
13 <regex match="ERROR"
14 source="stderr"
15 level="fatal"
16 description="You cannot do that!! What were you thinking!" />
17 </stdio>
18 <command>
19 <![CDATA[
20 ## prepare smalt index
21 smalt index
22
23 #if $k:
24 -k "$k"
25 #end if
26
27 #if $s:
28 -s "$s"
29 #end if
30
31 'temp' "$reference" &&
32
33 smalt map
34
35 -o $output
36
37 #if $oformat.outformat == "sam":
38 #if $oformat.samOptions:
39 -f "$oformat.outformat:$oformat.samOptions"
40 #else
41 -f "$oformat.outformat"
42 #end if
43 #elif $oformat.outformat == "bam":
44 #if $oformat.bamOptions:
45 -f "$oformat.outformat:$oformat.bamOptions"
46 #else
47 -f "$oformat.outformat"
48 #end if
49 #else
50 -f "$oformat.outformat"
51 #end if
52
53
54
55 -n \${GALAXY_SLOTS:-2}
56
57 #if $singlePaired.sPaired != "single":
58 -l $singlePaired.pairtype
59 #end if
60
61
62 #if $mincover:
63 -c "$mincover"
64 #end if
65
66 #if $scordiff:
67 -d "$scordiff"
68 #end if
69
70 #if $insfil:
71 -g "$insfil"
72 #end if
73
74 #if $insertmax:
75 -i "$insertmax"
76 #end if
77
78 #if $insertmin:
79 -j "$insertmin"
80 #end if
81
82 #if $minscor:
83 -m "$minscor"
84 #end if
85
86 #if $minbasq:
87 -q "$minbasq"
88 #end if
89
90 #if $seed:
91 -r "$seed"
92 #end if
93
94 #if $sw_weighted:
95 -w
96 #end if
97
98 #if $search_harder:
99 -x
100 #end if
101
102 #if $minid:
103 -y "$minid"
104 #end if
105
106
107 'temp'
108
109 #if $singlePaired.sPaired == "single":
110 $singlePaired.sInput1
111 #elif $singlePaired.sPaired == "paired":
112 $singlePaired.pInput1 $singlePaired.pInput2
113 #elif $singlePaired.sPaired == "collections":
114 $singlePaired.fastq_collection.forward $singlePaired.fastq_collection.reverse
115 #end if
116
117
118 #if $oformat.outformat == "bam":
119 && samtools sort -@ \${GALAXY_SLOTS:-1} $output -o sorted && mv sorted $output
120 #end if
121
122
123 ]]>
124 </command>
125
126
127 <inputs>
128 <conditional name="singlePaired">
129 <param name="sPaired" type="select" label="What is the library type?">
130 <option value="single">Single-end</option>
131 <option value="paired">Paired-end</option>
132 <option value="collections">Paired-end Collections</option>
133 </param>
134 <when value="single">
135 <param name="sInput1" type="data" format="fastq" label="Single end illumina fastq file" optional="false"/>
136 </when>
137 <when value="paired">
138 <param name="pInput1" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Forward FASTQ file" help="Must have ASCII encoded quality scores"/>
139 <param name="pInput2" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Reverse FASTQ file" help="File format must match the Forward FASTQ file"/>
140 <param name="pairtype" type="select" label="Pair Type" help="Type of read pair library">
141 <option value="pe">Illumina paired-end (short inserts)</option>
142 <option value="mp">Illumina mate-pair library (long inserts)</option>
143 <option value="pp">Mate-pair sequenced on the same strand</option>
144 </param>
145 </when>
146 <when value="collections">
147 <param name="fastq_collection" type="data_collection" label="Paired-end Fastq collection" help="" optional="false" format="txt" collection_type="paired" />
148 <param name="pairtype" type="select" label="Pair Type" help="Type of read pair library">
149 <option value="pe">Illumina paired-end (short inserts)</option>
150 <option value="mp">Illumina mate-pair library (long inserts)</option>
151 <option value="pp">Mate-pair sequenced on the same strand</option>
152 </param>
153 </when>
154
155 </conditional>
156
157
158
159 <!-- reference genome -->
160 <param name="reference" type="data" format="fasta" label="Select fasta reference"/>
161 <param name="k" type="integer" value="13" label="K-mer size" help="Specifies the word length. [wordlen] is an integer within the limits. between 3 and 20. The default word length is 13" max="20" min="3"/>
162 <param name="s" type="integer" optional="true" label="Step size" help="Specifies how many bases are skipped between indexed words."/>
163
164
165 <param name="mincover" type="text" label="Mincover" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent"/>
166 <param name="scordiff" type="text" label="Scordiff" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score"/>
167 <conditional name="oformat">
168 <param name="outformat" type="select" label="Format" help="">
169 <option value="cigar">cigar</option>
170 <option value="sam" selected="true">sam</option>
171 <option value="ssaha">ssaha</option>
172 <option value="bam">bam</option>
173 </param>
174 <when value="sam">
175 <param name="samOptions" type="select" display="checkboxes" label="Sam Options" multiple="true">
176 <option value="nohead">No Header</option>
177 <option value="clip">Hard Clip</option>
178 </param>
179 </when>
180 <when value="bam">
181 <param name="bamOptions" type="select" display="checkboxes" label="Bam Options" multiple="true">
182 <option value="clip">Hard Clip</option>
183 </param>
184 </when>
185 <when value="cigar">
186 </when>
187 <when value="ssaha">
188 </when>
189 </conditional>
190 <param name="insfil" type="data" optional="true" label="Distribution insert sizes " help="Use the distribution of insert sizes stored in the file [insfil. Thisfile is in ASCII format and can be generated using the 'sample'" format="sam"/>
191 <param name="insertmax" type="text" label="Maximum insert size (only in paired-end mode). " help="Maximum insert size (only in paired-end mode). The default is 500."/>
192 <param name="insertmin" type="text" label="Minimum insert size (only in paired-end mode). " help="Minimum insert size (only in paired-end mode). The default is 0."/>
193
194
195 <param name="minscor" type="text" label="Sets an absolute threshold of the Smith-Waterman scores." help="Mappings with scores below that threshold will not be reported. The default is &#060; minscor &#062; = &#060; wordlen &#062; + &#060; stepsiz &#062; - 1"/>
196
197 <param name="minbasq" type="text" label="Sets a base quality threshold (0 &#060;= minbasq &#060;= 10, default 0)" help="K-mer words of the read with nucleotides that have a base quality below this threshold are not looked up in the hash index."/>
198
199 <param name="seed" type="text" label="If the there are multiple mappings with the same best alignment score report one picked at random." help="is an integer >= 0 used to seed the pseudo-random genarator."/>
200
201 <param name="sw_weighted" type="boolean" label="Smith-Waterman scores are complexity weighted."/>
202
203 <param name="search_harder" type="boolean" label="This flag triggers a more exhaustive search for alignments at the cost of decreased speed" help="In paired-end mode each mate is mapped independently. (By default the mate with fewer hits in the hash index is mapped first and the vicinity is searched for mappings of its mate.)"/>
204
205 <param name="minid" type="text" label="Sets an identity threshold for a mapping to be reported (default: 0)." help="specifies the number of exactly matching nucleotides either as a positive integer or as a fraction of the read length (&#062;= 1.0)."/>
206 </inputs>
207
208 <outputs>
209 <data name="output" format="cigar" >
210 <change_format>
211 <when input="oformat.outformat" value="cigar" format="cigar"/>
212 <when input="oformat.outformat" value="sam" format="sam"/>
213 <when input="oformat.outformat" value="ssaha" format="ssaha"/>
214 <when input="oformat.outformat" value="bam" format="bam"/>
215 </change_format>
216 </data>
217 </outputs>
218 <tests>
219 <test>
220 <param name="sPaired" value="paired"/>
221 <param name="pInput1" value="ecoli_1K_1.fq"/>
222 <param name="pInput2" value="ecoli_1K_2.fq"/>
223 <param name="pairtype" value="pe"/>
224 <param name="source" value="history"/>
225 <param name="reference" value="contigs.fasta"/>
226 <param name="outformat" value="sam"/>
227 <output name="output">
228 <assert_contents>
229 <has_text text="SN:NODE_1_length_1000_cov_140.620106" />
230 </assert_contents>
231 </output>
232 </test>
233 </tests>
234 <help>
235
236 **What it does**
237
238 SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads.
239
240
241 ------
242
243
244 **Know what you are doing**
245
246 .. class:: warningmark
247
248 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
249
250 .. __: http://www.sanger.ac.uk/resources/software/smalt/
251
252 ------
253
254 **Input formats**
255
256 SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files.
257
258 ------
259
260
261 Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/".
262
263 ------
264
265
266 -a Output explicit alignments along with the mapping coordinates.
267
268 -c &#060;mincover INT&#062;
269 Only consider mappings where the k-mer word seeds cover the query read to
270 a minimum extent. If &#060;mincover&#062; is an integer or floating point &#062; 1.0, at
271 least this many bases of the read must be covered by k-mer word seeds. If
272 &#060;mincover&#062; is a floating point &#060;= 1.0, it specifies the fraction of the
273 query read length that must be covered by k-mer word seeds. This option
274 is only valid in conjunction with the '-x' flag.
275
276 -d &#060;scordiff INT&#062;
277 Set a threshold of the Smith-Waterman alignment score relative to the
278 maximum score. When mapping single reads, all alignments are reported
279 that have Smith-Waterman scores within &#060;scorediff&#062; of the maximum.
280 Mappings with lower scores are skipped. If &#060;scorediff&#062; is set to to a
281 value &#060; 0, all alignments are printed that have scores above the
282 threshold specified with the '-m &#060;minscor&#062;' option.
283 For paired reads, only a value of 0 is supported. With the option '-d 0'
284 all aligments (pairings) with the best score are output. By default
285 (without the option '-d 0') single reads/mates with multiple best mappings
286 are reported as 'not mapped'.
287
288 -f &#060;ouform STR&#062;
289 Specifies the output format. &#060;ouform&#062; can be either 'sam'(default),
290 'cigar', 'gff' or 'ssaha'. Optional extension 'sam:nohead,x,clip'
291 (see manual). Support for BAM format is dependent on additional
292 libraries (not installed).
293
294 -F &#060;inform STR&#062;
295 Specifies the input format. The only available format is fastq (default).
296 Support for BAM and SAM formats (see: samtools.sourceforge.net) depends
297 on additional libraries (not installed).
298
299 -g &#060;insfil STR&#062;
300 Use the distribution of insert sizes stored in the file &#060;insfil&#062;. This
301 file is in ASCII format and can be generated using the 'sample' task see
302 'smalt sample -H' for help).
303
304 -H Print these instructions.
305
306 -i &#060;insert_max INT&#062;
307 Maximum insert size (only in paired-end mode). The default is 500.
308
309 -j &#060;insert_min INT&#062;
310 Minimum insert size (only in paired-end mode). The default is 0.
311
312 -l &#060;pairtyp STR&#062;
313 Type of read pair library. &#060;pairtyp&#062; can be either 'pe', i.e. for
314 the Illumina paired-end library for short inserts ( \|&#8212;&#062; &#060;&#8212;\| ). 'mp'
315 for the Illumina mate-pair library for long inserts ( &#060;&#8212;\| \|&#8212;&#062; ) or
316 'pp' for mates sequenced on the same strand ( \|&#8212;&#062; \|&#8212;&#062; ). 'pe' is the
317 default.
318
319 -m &#060;minscor INT&#062;
320 Sets an absolute threshold of the Smith-Waterman scores. Mappings with
321 scores below that threshold will not be reported. The default is
322 &#060;minscor&#062; = &#060;wordlen&#062; + &#060;stepsiz&#062; - 1.
323
324 -n &#060;nthreads INT&#062;
325 Run smalt using mutiple threads. &#060;nthread&#062; is the number of additional
326 threads forked. The order of the reads in the input files is not preserved
327 for the output unless '-O' is also specified.
328
329 -o &#060;oufilnam STR&#062;
330 Write mapping output (e.g. SAM lines) to a separate file. If this option
331 is not specified, mappings are written to standard output.
332
333 -O Output mappings in the order of the reads in the input files when using
334 multiple threads (option '-n &#060;nthreads&#062;').
335
336
337 -p Report partial alignments if they are complementary on the read (split
338 reads).
339
340 -q &#060;minbasq INT&#062;
341 Sets a base quality threshold (0 &#060;= minbasq &#060;= 10, default 0).
342 K-mer words of the read with nucleotides that have a base quality below
343 this threshold are not looked up in the hash index.
344
345 -r &#060;seed INT&#062;
346 If &#060;seed&#062; &#062;= 0 report an alignment selected at random where there are
347 multiple mappings with the same best alignment score. With &#060;seed&#062; = 0
348 (default) a seed is derived from the current calendar time. If &#060;seed&#062;
349 &#060; 0 reads with multiple best mappings are reported as 'not mapped'.
350
351 -S &#060;scorspec STR&#062;
352 Specify alignment penalty scores for a match or mismatch (substitution),
353 or for opening or extending a gap. &#060;scorspec&#062; is a comma speparated
354 list of integer assigments to one or more of the following variables:
355 match, subst, gapopen, gapext, i.e. 'gapopen=-5,gapext=-4' (no spaces
356 allowed in &#060;scorespec&#062;). Default:'match=1,subst=-2,gapopen=-4,gapext=-3'
357
358 -w Smith-Waterman scores are complexity weighted.
359
360 -x This flag triggers a more exhaustive search for alignments at the cost
361 of speed. In paired-end mode each mate is mapped independently.(By
362 default the mate with fewer hits in the hash index is mapped first and
363 the vicinity is searched for mappings of its mate.)
364
365 -y &#060;minid FLT&#062;
366 Sets an identity threshold for a mapping to be reported (default: 0).
367 &#060;minid&#062; specifies the number of exactly matching nucleotides either as
368 a positive integer or as a fraction of the read length (&#060;= 1.0).
369 </help>
370 </tool>