9
|
1 <tool id="smalt_wrapper (docker)" name="SMALT" version="0.0.3">
|
|
2 <requirements>
|
|
3 <container type="docker">apetkau/smalt-galaxy</container>
|
|
4 </requirements>
|
|
5 <description>maps query reads onto the reference sequences</description>
|
|
6 <command>
|
|
7 smalt_wrapper.py
|
|
8 --threads="4"
|
|
9
|
|
10 ## reference source
|
|
11 --fileSource=$genomeSource.refGenomeSource
|
|
12 #if $genomeSource.refGenomeSource == "history":
|
|
13 ##build index on the fly
|
|
14 --ref="${genomeSource.ownFile}"
|
|
15 --dbkey=$dbkey
|
|
16 #else:
|
|
17 ##use precomputed indexes
|
|
18 --ref="${genomeSource.indices.fields.path}"
|
|
19 --do_not_build_index
|
|
20 #end if
|
|
21
|
|
22 ## input file(s)
|
|
23 --input1=$paired.input1
|
|
24 #if $paired.sPaired == "paired":
|
|
25 --input2=$paired.input2
|
|
26 #end if
|
|
27
|
|
28 ## output file
|
|
29 --output=$output
|
|
30
|
|
31 ## run parameters
|
|
32 --genAlignType=$paired.sPaired
|
|
33 --params=$params.source_select
|
|
34 #if $params.source_select != "pre_set":
|
|
35 --scorDiff=$params.scorDiff
|
|
36 #if $paired.sPaired == "paired":
|
|
37 --insertMax=$params.insertMax
|
|
38 --insertMin=$params.insertMin
|
|
39 --pairTyp=$params.pairTyp
|
|
40 #end if
|
|
41 --minScor=$params.minScor
|
|
42 --partialAlignments=$params.partialAlignments
|
|
43 --minBasq=$params.minBasq
|
|
44 --seed=$params.seed
|
|
45 --complexityWeighted=$params.complexityWeighted
|
|
46 --exhaustiveSearch=$params.cExhaustiveSearch.exhaustiveSearch
|
|
47 #if $params.cExhaustiveSearch.exhaustiveSearch == "true"
|
|
48 --minCover=$params.cExhaustiveSearch.minCover
|
|
49 #end if
|
|
50 --minId=$params.minId
|
|
51 #end if
|
|
52
|
|
53 ## suppress output SAM header
|
|
54 --suppressHeader=$suppressHeader
|
|
55 </command>
|
|
56 <inputs>
|
|
57 <conditional name="genomeSource">
|
|
58 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
|
|
59 <option value="indexed">Use a built-in index</option>
|
|
60 <option value="history">Use one from the history</option>
|
|
61 </param>
|
|
62 <when value="indexed">
|
|
63 <param name="indices" type="select" label="Select a reference genome">
|
|
64 <options from_data_table="smalt_indexes">
|
|
65 <filter type="sort_by" column="2" />
|
|
66 <validator type="no_options" message="No indexes are available" />
|
|
67 </options>
|
|
68 </param>
|
|
69 </when>
|
|
70 <when value="history">
|
|
71 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
|
|
72 </when>
|
|
73 </conditional>
|
|
74 <conditional name="paired">
|
|
75 <param name="sPaired" type="select" label="Is this library mate-paired?">
|
|
76 <option value="single">Single-end</option>
|
|
77 <option value="paired">Paired-end</option>
|
|
78 </param>
|
|
79 <when value="single">
|
|
80 <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
|
|
81 </when>
|
|
82 <when value="paired">
|
|
83 <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
|
|
84 <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
|
|
85 </when>
|
|
86 </conditional>
|
|
87 <conditional name="params">
|
|
88 <param name="source_select" type="select" label="Smalt settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
|
|
89 <option value="pre_set">Commonly Used</option>
|
|
90 <option value="full">Full Parameter List</option>
|
|
91 </param>
|
|
92 <when value="pre_set" />
|
|
93 <when value="full">
|
|
94 <conditional name="cExhaustiveSearch">
|
|
95 <param name="exhaustiveSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Do exhaustive search? (map -x)" help="This flag triggers a more exhaustive search for alignments at the cost of decreased speed." />
|
|
96 <when value="true">
|
|
97 <param name="minCover" type="float" value="0" label="Minimum cover (map -c)" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent." />
|
|
98 </when>
|
|
99 <when value="no" />
|
|
100 </conditional>
|
|
101 <param name="scorDiff" type="integer" value="0" label="Score diff (map -d)" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score." />
|
|
102 <param name="insertMax" type="integer" value="500" label="Maximum insert size (map -i)" help="Only in paired-end mode." />
|
|
103 <param name="insertMin" type="integer" value="0" label="Minimum insert size (map -j)" help="Only in paired-end mode." />
|
|
104 <param name="pairTyp" type="text" size="2" value="pe" label="Type of read pair library (map -l)" help="Can be either 'pe', 'mp' or 'pp'." />
|
|
105 <param name="minScor" type="integer" value="0" label="Minimum score (map -m)" help="Sets an absolute threshold of the Smith-Waterman scores." />
|
|
106 <param name="partialAlignments" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Partial alignments (map -p)" help="Report partial alignments if they are complementary on the read (split reads)." />
|
|
107 <param name="minBasq" type="integer" value="0" label="Base quality threshold (map -q)" help="Sets a base quality threshold (0 <= minbasq <= 10, default 0)." />
|
|
108 <param name="seed" type="integer" value="0" label="Seed (map -r)" help="See below." />
|
|
109 <param name="complexityWeighted" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Complexity weighted (map -w)" help="Smith-Waterman scores are complexity weighted." />
|
|
110 <param name="minId" type="float" value="0" label="Identity threshold (map -y)" help="Sets an identity threshold for a mapping to be reported." />
|
|
111 </when>
|
|
112 </conditional>
|
|
113 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Smalt produces SAM with several lines of header information" />
|
|
114 </inputs>
|
|
115 <outputs>
|
|
116 <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
|
|
117 <actions>
|
|
118 <conditional name="genomeSource.refGenomeSource">
|
|
119 <when value="indexed">
|
|
120 <action type="metadata" name="dbkey">
|
|
121 <option type="from_data_table" name="smalt_indexes" column="1">
|
|
122 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
123 <filter type="param_value" ref="genomeSource.indices" column="0"/>
|
|
124 </option>
|
|
125 </action>
|
|
126 </when>
|
|
127 <when value="history">
|
|
128 <action type="metadata" name="dbkey">
|
|
129 <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
|
|
130 </action>
|
|
131 </when>
|
|
132 </conditional>
|
|
133 </actions>
|
|
134 </data>
|
|
135 </outputs>
|
|
136 <help>
|
|
137
|
|
138 **What it does**
|
|
139
|
|
140 SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads.
|
|
141
|
|
142 ------
|
|
143
|
|
144 Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/".
|
|
145
|
|
146 ------
|
|
147
|
|
148 **Know what you are doing**
|
|
149
|
|
150 .. class:: warningmark
|
|
151
|
|
152 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
|
|
153
|
|
154 .. __: http://www.sanger.ac.uk/resources/software/smalt/
|
|
155
|
|
156 ------
|
|
157
|
|
158 **Input formats**
|
|
159
|
|
160 SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files.
|
|
161
|
|
162 ------
|
|
163
|
|
164 **A Note on Built-in Reference Genomes**
|
|
165
|
|
166 The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
|
|
167
|
|
168 ------
|
|
169
|
|
170 **Outputs**
|
|
171
|
|
172 The output is in SAM format.
|
|
173
|
|
174 -------
|
|
175
|
|
176 **SMALT parameter list**
|
|
177
|
|
178 This is an exhaustive list of SMALT options:
|
|
179
|
|
180 For **map**::
|
|
181
|
|
182 -a
|
|
183 Output explicit alignments along with the mappings.
|
|
184
|
|
185 -c <mincover>
|
|
186 Only consider mappings where the k-mer word seeds cover the query read to
|
|
187 a minimum extent. If <mincover> is an integer or floating point > 1.0, at
|
|
188 least this many bases of the read must be covered by k-mer word seeds. If
|
|
189 <mincover> is a floating point <= 1.0, it specifies the fraction of the
|
|
190 query read length that must be covered by k-mer word seeds. This option
|
|
191 is only valid in conjunction with the '-x' flag.
|
|
192
|
|
193 -d <scordiff>
|
|
194 Set a threshold of the Smith-Waterman alignment score relative to the
|
|
195 maximum score. When mapping single reads, all alignments are reported
|
|
196 that have Smith-Waterman scores within <scorediff> of the maximum.
|
|
197 Mappings with lower scores are skipped. If <scorediff> is set to to a
|
|
198 value < 0, all alignments are printed that have scores above the
|
|
199 threshold specified with the '-m <minscor>' option.
|
|
200 For paired reads, only a value of 0 is supported. With the option '-d 0'
|
|
201 all aligments (pairings) with the best score are output. By default
|
|
202 (without the option '-d 0') single reads/mates with multiple best mappings
|
|
203 are reported as 'not mapped'.
|
|
204
|
|
205 -f <format>
|
|
206 Specifies the output format. <format> can be either 'bam', 'cigar', 'gff',
|
|
207 'sam' (default), 'samsoft' or 'ssaha'. Optional extension 'sam:nohead,clip'
|
|
208 (see manual)
|
|
209
|
|
210 -F <inform>
|
|
211 Specifies the input format. <inform> can be either 'fastq' (default),
|
|
212 'sam' or 'bam' (see: samtools.sourceforge.net). SAM and BAM formats
|
|
213 require additional libraries to be installed.
|
|
214
|
|
215 -g <insfil>
|
|
216 Use the distribution of insert sizes stored in the file <insfil>. This
|
|
217 file is in ASCII format and can be generated using the 'sample' task see
|
|
218 'smalt sample -H' for help).
|
|
219
|
|
220 -H
|
|
221 Print these instructions.
|
|
222
|
|
223 -i <insertmax>
|
|
224 Maximum insert size (only in paired-end mode). The default is 500.
|
|
225
|
|
226 -j <insertmin>
|
|
227 Minimum insert size (only in paired-end mode). The default is 0.
|
|
228
|
|
229 -l <pairtyp>
|
|
230 Type of read pair library. <pairtyp> can be either 'pe', i.e. for
|
|
231 the Illumina paired-end library for short inserts (|--> <--|). 'mp'
|
|
232 for the Illumina mate-pair library for long inserts (<--| |-->) or
|
|
233 'pp' for mates sequenced on the same strand (|--> |-->). 'pe' is the
|
|
234 default.
|
|
235
|
|
236 -m <minscor>
|
|
237 Sets an absolute threshold of the Smith-Waterman scores. Mappings with
|
|
238 scores below that threshold will not be reported. The default is
|
|
239 <minscor> = <wordlen> + <stepsiz> - 1
|
|
240
|
|
241 -n <nthreads>
|
|
242 Run smalt using mutiple threads. <nthread> is the number of additional
|
|
243 threads forked from the main thread. The order of the reads in the
|
|
244 input files is not preserved for the output unless '-O' is also specified.
|
|
245
|
|
246 -o <oufilnam>
|
|
247 Write mapping output (e.g. SAM lines) to a separate file. If this option
|
|
248 is not specified, mappings are written to standard output together with
|
|
249 other messages.
|
|
250
|
|
251 -O
|
|
252 Output mappings in the order of the reads in the input files when using
|
|
253 multiple threads (option '-n <nthreads>').
|
|
254
|
|
255 -p
|
|
256 Report partial alignments if they are complementary on the read (split
|
|
257 reads).
|
|
258
|
|
259 -q <minbasq>
|
|
260 Sets a base quality threshold (0 <= minbasq <= 10, default 0).
|
|
261 K-mer words of the read with nucleotides that have a base quality below
|
|
262 this threshold are not looked up in the hash index.
|
|
263
|
|
264 -r <seed>
|
|
265 If <seed> >= 0 report an alignment selected at random where there are
|
|
266 multiple mappings with the same best alignment score. With <seed> = 0
|
|
267 (default) a seed is derived from the current calendar time. If <seed>
|
|
268 < 0 reads with multiple best mappings are reported as 'not mapped'.
|
|
269
|
|
270 -T <tmp_dir>
|
|
271 Write temporary files to directory <tmp_dir> (used with input files in
|
|
272 SAM/BAM format).
|
|
273
|
|
274 -w
|
|
275 Smith-Waterman scores are complexity weighted.
|
|
276
|
|
277 -x
|
|
278 This flag triggers a more exhaustive search for alignments at the cost
|
|
279 of decreased speed. In paired-end mode each mate is mapped independently.
|
|
280 (By default the mate with fewer hits in the hash index is mapped first
|
|
281 and the vicinity is searched for mappings of its mate.)
|
|
282
|
|
283 -y <minid>
|
|
284 Sets an identity threshold for a mapping to be reported (default: 0).
|
|
285 <minid> specifies the number of exactly matching nucleotides either as
|
|
286 a positive integer or as a fraction of the read length (<= 1.0).
|
|
287
|
|
288 </help>
|
|
289 </tool>
|
|
290
|
|
291
|