Mercurial > repos > nml > smalt_map
comparison smalt_map.xml @ 0:77cc50d982c0 draft default tip
planemo upload for repository https://sourceforge.net/projects/smalt/ commit 008f4667b70be22e9ddf496738b3f74bb942ed28
author | nml |
---|---|
date | Tue, 19 Sep 2017 16:40:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:77cc50d982c0 |
---|---|
1 <tool id="smalt_map" name="smalt map" version="1.2.0" > | |
2 <description>Map query reads (FASTA/FASTQ) format onto the reference sequences</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.7.6">smalt</requirement> | |
5 </requirements> | |
6 <stdio> | |
7 <exit_code range="1:" level="fatal" description="Unknown error" /> | |
8 <regex match="Command line error" | |
9 source="stdout" | |
10 level="fatal" | |
11 description="You cannot do that!! What were you thinking!" /> | |
12 <regex match="ERROR" | |
13 source="stderr" | |
14 level="fatal" | |
15 description="You cannot do that!! What were you thinking!" /> | |
16 </stdio> | |
17 <command> | |
18 bash $__tool_directory__/smalt_map.sh $smi $sma $oformat.outformat | |
19 | |
20 #if $singlePaired.sPaired == "single": | |
21 1 $singlePaired.sInput1 | |
22 #elif $singlePaired.sPaired == "paired": | |
23 2 $singlePaired.pInput1 $singlePaired.pInput2 | |
24 #elif $singlePaired.sPaired == "collections": | |
25 2 $singlePaired.fastq_collection.forward $singlePaired.fastq_collection.reverse | |
26 #end if | |
27 | |
28 -o $output | |
29 | |
30 #if $oformat.outformat == "sam": | |
31 #if $oformat.samOptions: | |
32 -f "$oformat.outformat:$oformat.samOptions" | |
33 #else | |
34 -f "$oformat.outformat" | |
35 #end if | |
36 #elif $oformat.outformat == "bam": | |
37 #if $oformat.bamOptions: | |
38 -f "$oformat.outformat:$oformat.bamOptions" | |
39 #else | |
40 -f "$oformat.outformat" | |
41 #end if | |
42 #else | |
43 -f "$oformat.outformat" | |
44 #end if | |
45 | |
46 | |
47 | |
48 -n \${GALAXY_SLOTS:-2} | |
49 | |
50 #if $singlePaired.sPaired != "single": | |
51 -l $singlePaired.pairtype | |
52 #end if | |
53 | |
54 | |
55 #if $mincover: | |
56 -c "$mincover" | |
57 #end if | |
58 | |
59 #if $scordiff: | |
60 -d "$scordiff" | |
61 #end if | |
62 | |
63 #if $insfil: | |
64 -g "$insfil" | |
65 #end if | |
66 | |
67 #if $insertmax: | |
68 -i "$insertmax" | |
69 #end if | |
70 | |
71 #if $insertmin: | |
72 -j "$insertmin" | |
73 #end if | |
74 | |
75 #if $minscor: | |
76 -m "$minscor" | |
77 #end if | |
78 | |
79 #if $minbasq: | |
80 -q "$minbasq" | |
81 #end if | |
82 | |
83 #if $seed: | |
84 -r "$seed" | |
85 #end if | |
86 | |
87 #if $sw_weighted: | |
88 -w | |
89 #end if | |
90 | |
91 #if $search_harder: | |
92 -x | |
93 #end if | |
94 | |
95 #if $minid: | |
96 -y "$minid" | |
97 #end if | |
98 | |
99 </command> | |
100 | |
101 | |
102 <inputs> | |
103 <conditional name="singlePaired"> | |
104 <param name="sPaired" type="select" label="What is the library type?"> | |
105 <option value="single">Single-end</option> | |
106 <option value="paired">Paired-end</option> | |
107 <option value="collections">Paired-end Collections</option> | |
108 </param> | |
109 <when value="single"> | |
110 <param name="sInput1" type="data" format="fastq" label="Single end illumina fastq file" optional="false"/> | |
111 </when> | |
112 <when value="paired"> | |
113 <param name="pInput1" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Forward FASTQ file" help="Must have ASCII encoded quality scores"/> | |
114 <param name="pInput2" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Reverse FASTQ file" help="File format must match the Forward FASTQ file"/> | |
115 <param name="pairtype" type="select" label="Pair Type" help="Type of read pair library"> | |
116 <option value="pe">Illumina paired-end (short inserts)</option> | |
117 <option value="mp">Illumina mate-pair library (long inserts)</option> | |
118 <option value="pp">Mate-pair sequenced on the same strand</option> | |
119 </param> | |
120 </when> | |
121 <when value="collections"> | |
122 <param name="fastq_collection" type="data_collection" label="Paired-end Fastq collection" help="" optional="false" format="txt" collection_type="paired" /> | |
123 <param name="pairtype" type="select" label="Pair Type" help="Type of read pair library"> | |
124 <option value="pe">Illumina paired-end (short inserts)</option> | |
125 <option value="mp">Illumina mate-pair library (long inserts)</option> | |
126 <option value="pp">Mate-pair sequenced on the same strand</option> | |
127 </param> | |
128 </when> | |
129 | |
130 </conditional> | |
131 | |
132 <param name="smi" type="data" format="binary" label="SMI index" help=""/> | |
133 <param name="sma" type="data" format="binary" label="SMA index" help=""/> | |
134 | |
135 <param name="mincover" type="text" label="Mincover" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent"/> | |
136 <param name="scordiff" type="text" label="Scordiff" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score"/> | |
137 <conditional name="oformat"> | |
138 <param name="outformat" type="select" label="Format" help=""> | |
139 <option value="cigar">cigar</option> | |
140 <option value="sam" selected="true">sam</option> | |
141 <option value="ssaha">ssaha</option> | |
142 <option value="bam">bam</option> | |
143 </param> | |
144 <when value="sam"> | |
145 <param name="samOptions" type="select" display="checkboxes" label="Sam Options" multiple="true"> | |
146 <option value="nohead">No Header</option> | |
147 <option value="clip">Hard Clip</option> | |
148 </param> | |
149 </when> | |
150 <when value="bam"> | |
151 <param name="bamOptions" type="select" display="checkboxes" label="Bam Options" multiple="true"> | |
152 <option value="clip">Hard Clip</option> | |
153 </param> | |
154 </when> | |
155 <when value="cigar"> | |
156 </when> | |
157 <when value="ssaha"> | |
158 </when> | |
159 </conditional> | |
160 <param name="insfil" type="data" optional="true" label="Distribution insert sizes " help="Use the distribution of insert sizes stored in the file [insfil. Thisfile is in ASCII format and can be generated using the 'sample'" format="sam"/> | |
161 <param name="insertmax" type="text" label="Maximum insert size (only in paired-end mode). " help="Maximum insert size (only in paired-end mode). The default is 500."/> | |
162 <param name="insertmin" type="text" label="Minimum insert size (only in paired-end mode). " help="Minimum insert size (only in paired-end mode). The default is 0."/> | |
163 | |
164 | |
165 <param name="minscor" type="text" label="Sets an absolute threshold of the Smith-Waterman scores." help="Mappings with scores below that threshold will not be reported. The default is < minscor > = < wordlen > + < stepsiz > - 1"/> | |
166 | |
167 <param name="minbasq" type="text" label="Sets a base quality threshold (0 <= minbasq <= 10, default 0)" help="K-mer words of the read with nucleotides that have a base quality below this threshold are not looked up in the hash index."/> | |
168 | |
169 <param name="seed" type="text" label="If the there are multiple mappings with the same best alignment score report one picked at random." help="is an integer >= 0 used to seed the pseudo-random genarator."/> | |
170 | |
171 <param name="sw_weighted" type="boolean" label="Smith-Waterman scores are complexity weighted."/> | |
172 | |
173 <param name="search_harder" type="boolean" label="This flag triggers a more exhaustive search for alignments at the cost of decreased speed" help="In paired-end mode each mate is mapped independently. (By default the mate with fewer hits in the hash index is mapped first and the vicinity is searched for mappings of its mate.)"/> | |
174 | |
175 <param name="minid" type="text" label="Sets an identity threshold for a mapping to be reported (default: 0)." help="specifies the number of exactly matching nucleotides either as a positive integer or as a fraction of the read length (>= 1.0)."/> | |
176 </inputs> | |
177 | |
178 <outputs> | |
179 <data name="output" format="cigar" > | |
180 <change_format> | |
181 <when input="oformat.outformat" value="cigar" format="cigar"/> | |
182 <when input="oformat.outformat" value="sam" format="sam"/> | |
183 <when input="oformat.outformat" value="ssaha" format="ssaha"/> | |
184 <when input="oformat.outformat" value="bam" format="bam"/> | |
185 </change_format> | |
186 </data> | |
187 </outputs> | |
188 <tests> | |
189 <test> | |
190 <param name="sPaired" value="paired"/> | |
191 <param name="pInput1" value="ecoli_1K_1.fq"/> | |
192 <param name="pInput2" value="ecoli_1K_2.fq"/> | |
193 <param name="pairtype" value="pe"/> | |
194 <param name="smi" value="output.smi"/> | |
195 <param name="sma" value="output.sma"/> | |
196 <param name="outformat" value="sam"/> | |
197 <output name="output"> | |
198 <assert_contents> | |
199 <has_text text="@HD VN:1.3 SO:unknown" /> | |
200 </assert_contents> | |
201 </output> | |
202 </test> | |
203 </tests> | |
204 <help> | |
205 | |
206 **What it does** | |
207 | |
208 SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads. | |
209 | |
210 | |
211 ------ | |
212 | |
213 | |
214 **Know what you are doing** | |
215 | |
216 .. class:: warningmark | |
217 | |
218 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. | |
219 | |
220 .. __: http://www.sanger.ac.uk/resources/software/smalt/ | |
221 | |
222 ------ | |
223 | |
224 **Input formats** | |
225 | |
226 SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files. | |
227 | |
228 ------ | |
229 | |
230 | |
231 Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/". | |
232 | |
233 ------ | |
234 | |
235 | |
236 -a Output explicit alignments along with the mapping coordinates. | |
237 | |
238 -c <mincover INT> | |
239 Only consider mappings where the k-mer word seeds cover the query read to | |
240 a minimum extent. If <mincover> is an integer or floating point > 1.0, at | |
241 least this many bases of the read must be covered by k-mer word seeds. If | |
242 <mincover> is a floating point <= 1.0, it specifies the fraction of the | |
243 query read length that must be covered by k-mer word seeds. This option | |
244 is only valid in conjunction with the '-x' flag. | |
245 | |
246 -d <scordiff INT> | |
247 Set a threshold of the Smith-Waterman alignment score relative to the | |
248 maximum score. When mapping single reads, all alignments are reported | |
249 that have Smith-Waterman scores within <scorediff> of the maximum. | |
250 Mappings with lower scores are skipped. If <scorediff> is set to to a | |
251 value < 0, all alignments are printed that have scores above the | |
252 threshold specified with the '-m <minscor>' option. | |
253 For paired reads, only a value of 0 is supported. With the option '-d 0' | |
254 all aligments (pairings) with the best score are output. By default | |
255 (without the option '-d 0') single reads/mates with multiple best mappings | |
256 are reported as 'not mapped'. | |
257 | |
258 -f <ouform STR> | |
259 Specifies the output format. <ouform> can be either 'sam'(default), | |
260 'cigar', 'gff' or 'ssaha'. Optional extension 'sam:nohead,x,clip' | |
261 (see manual). Support for BAM format is dependent on additional | |
262 libraries (not installed). | |
263 | |
264 -F <inform STR> | |
265 Specifies the input format. The only available format is fastq (default). | |
266 Support for BAM and SAM formats (see: samtools.sourceforge.net) depends | |
267 on additional libraries (not installed). | |
268 | |
269 -g <insfil STR> | |
270 Use the distribution of insert sizes stored in the file <insfil>. This | |
271 file is in ASCII format and can be generated using the 'sample' task see | |
272 'smalt sample -H' for help). | |
273 | |
274 -H Print these instructions. | |
275 | |
276 -i <insert_max INT> | |
277 Maximum insert size (only in paired-end mode). The default is 500. | |
278 | |
279 -j <insert_min INT> | |
280 Minimum insert size (only in paired-end mode). The default is 0. | |
281 | |
282 -l <pairtyp STR> | |
283 Type of read pair library. <pairtyp> can be either 'pe', i.e. for | |
284 the Illumina paired-end library for short inserts ( \|—> <—\| ). 'mp' | |
285 for the Illumina mate-pair library for long inserts ( <—\| \|—> ) or | |
286 'pp' for mates sequenced on the same strand ( \|—> \|—> ). 'pe' is the | |
287 default. | |
288 | |
289 -m <minscor INT> | |
290 Sets an absolute threshold of the Smith-Waterman scores. Mappings with | |
291 scores below that threshold will not be reported. The default is | |
292 <minscor> = <wordlen> + <stepsiz> - 1. | |
293 | |
294 -n <nthreads INT> | |
295 Run smalt using mutiple threads. <nthread> is the number of additional | |
296 threads forked. The order of the reads in the input files is not preserved | |
297 for the output unless '-O' is also specified. | |
298 | |
299 -o <oufilnam STR> | |
300 Write mapping output (e.g. SAM lines) to a separate file. If this option | |
301 is not specified, mappings are written to standard output. | |
302 | |
303 -O Output mappings in the order of the reads in the input files when using | |
304 multiple threads (option '-n <nthreads>'). | |
305 | |
306 | |
307 -p Report partial alignments if they are complementary on the read (split | |
308 reads). | |
309 | |
310 -q <minbasq INT> | |
311 Sets a base quality threshold (0 <= minbasq <= 10, default 0). | |
312 K-mer words of the read with nucleotides that have a base quality below | |
313 this threshold are not looked up in the hash index. | |
314 | |
315 -r <seed INT> | |
316 If <seed> >= 0 report an alignment selected at random where there are | |
317 multiple mappings with the same best alignment score. With <seed> = 0 | |
318 (default) a seed is derived from the current calendar time. If <seed> | |
319 < 0 reads with multiple best mappings are reported as 'not mapped'. | |
320 | |
321 -S <scorspec STR> | |
322 Specify alignment penalty scores for a match or mismatch (substitution), | |
323 or for opening or extending a gap. <scorspec> is a comma speparated | |
324 list of integer assigments to one or more of the following variables: | |
325 match, subst, gapopen, gapext, i.e. 'gapopen=-5,gapext=-4' (no spaces | |
326 allowed in <scorespec>). Default:'match=1,subst=-2,gapopen=-4,gapext=-3' | |
327 | |
328 -w Smith-Waterman scores are complexity weighted. | |
329 | |
330 -x This flag triggers a more exhaustive search for alignments at the cost | |
331 of speed. In paired-end mode each mate is mapped independently.(By | |
332 default the mate with fewer hits in the hash index is mapped first and | |
333 the vicinity is searched for mappings of its mate.) | |
334 | |
335 -y <minid FLT> | |
336 Sets an identity threshold for a mapping to be reported (default: 0). | |
337 <minid> specifies the number of exactly matching nucleotides either as | |
338 a positive integer or as a fraction of the read length (<= 1.0). | |
339 </help> | |
340 </tool> |