comparison gsnap.xml @ 11:6adc485b6dc0 draft default tip

Uploaded
author jjohnson
date Tue, 31 Jul 2012 08:19:46 -0400
parents 93911bac43da
children
comparison
equal deleted inserted replaced
10:93911bac43da 11:6adc485b6dc0
1 <tool id="gsnap" name="GSNAP" version="2.0.1">
2 <description>Genomic Short-read Nucleotide Alignment Program</description>
3 <requirements>
4 <requirement type="binary">gsnap</requirement>
5 </requirements>
6 <version_string>gsnap --version</version_string>
7 <command>
8 #import os.path, re
9 gsnap
10 --nthreads="4" --ordered
11 #if $refGenomeSource.genomeSource == "gmapdb":
12 #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
13 --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
14 #else:
15 --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
16 #end if
17 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
18 --kmer=$refGenomeSource.kmer
19 #end if
20 #if $refGenomeSource.use_splicing.src == 'gmapdb':
21 #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
22 -s $refGenomeSource.use_splicing.splicemap.value
23 #if $computation.trim_mismatch_score.__str__ == '0':
24 $ambig_splice_noclip
25 #end if
26 #end if
27 #elif $refGenomeSource.use_splicing.src == 'history':
28 #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
29 -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap)
30 #if $computation.trim_mismatch_score.__str__ == '0':
31 $ambig_splice_noclip
32 #end if
33 #end if
34 #end if
35 #if $refGenomeSource.use_snps.src == 'gmapdb':
36 #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
37 -v $refGenomeSource.use_snps.snpindex.value
38 #end if
39 #elif $refGenomeSource.use_snps.src == 'history':
40 #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
41 -V $refGenomeSource.use_snps.snpindex.extra_files_path -v $refGenomeSource.use_snps.snpindex.metadata.snps_name
42 #end if
43 #end if
44 #if $refGenomeSource.mode.__str__ != '':
45 --mode=$refGenomeSource.mode
46 #end if
47 #* ## No longer in options as of version 2011-11-30
48 #if $mapq_unique_score.__str__ != '':
49 --mapq-unique-score=$mapq_unique_score
50 #end if
51 *#
52 #if $computation.options == "advanced":
53 #if $computation.max_mismatches.__str__ != '':
54 --max-mismatches=$computation.max_mismatches
55 #end if
56 $computation.query_unk_mismatch
57 $computation.genome_unk_mismatch
58 #if $computation.terminal_threshold.__str__ != '':
59 --terminal-threshold=$computation.terminal_threshold
60 #end if
61 #if $computation.indel_penalty.__str__ != '':
62 --indel-penalty=$computation.indel_penalty
63 #end if
64 #if $computation.indel_endlength.__str__ != '':
65 --indel-endlength=$computation.indel_endlength
66 #end if
67 #if $computation.max_middle_insertions.__str__ != '':
68 --max-middle-insertions=$computation.max_middle_insertions
69 #end if
70 #if $computation.max_middle_deletions.__str__ != '':
71 --max-middle-deletions=$computation.max_middle_deletions
72 #end if
73 #if $computation.max_end_insertions.__str__ != '':
74 --max-end-insertions=$computation.max_end_insertions
75 #end if
76 #if $computation.max_end_deletions.__str__ != '':
77 --max-end-deletions=$computation.max_end_deletions
78 #end if
79 #if $computation.suboptimal_levels.__str__ != '':
80 --suboptimal-levels=$computation.suboptimal_levels
81 #end if
82 #if $computation.adapter_strip.__str__ != '':
83 --adapter-strip=$computation.adapter_strip
84 #end if
85 #if $computation.trim_mismatch_score.__str__ != '':
86 --trim-mismatch-score=$computation.trim_mismatch_score
87 #end if
88 #if $computation.trim_indel_score.__str__ != '':
89 --trim-indel-score=$computation.trim_indel_score
90 #end if
91 ## TODO - do we need these options (Is it tally XOR runlength?):
92 ## --tallydir= --use-tally=tally
93 ## --runlengthdir --use-runlength=runlength
94 #if $computation.use_tally != None and len($computation.use_tally.__str__) > 0:
95 ##--tallydir $os.path.dirname($computation.use_tally) --use-tally $os.path.basename($computation.use_tally)
96 --use-tally=$computation.use_tally
97 #end if
98 ## gmap options
99 #if $computation.gmap_mode.__str__ != '' and $computation.gmap_mode.__str__ != 'None':
100 --gmap-mode='$computation.gmap_mode'
101 #end if
102 #if $computation.trigger_score_for_gmap.__str__ != '':
103 --trigger-score-for-gmap=$computation.trigger_score_for_gmap
104 #end if
105 #if $computation.max_gmap_pairsearch.__str__ != '' and $re.search("pairsearch",$computation.gmap_mode):
106 --max-gmap-pairsearch=$computation.max_gmap_pairsearch
107 #end if
108 #if $computation.max_gmap_terminal.__str__ != '' and $re.search("terminal",$computation.gmap_mode):
109 --max-gmap-terminal=$computation.max_gmap_terminal
110 #end if
111 #if $computation.max_gmap_improvement.__str__ != '' and $re.search("improv",$computation.gmap_mode):
112 --max-gmap-improvement=$computation.max_gmap_improvement
113 #end if
114 #if $computation.microexon_spliceprob.__str__ != '':
115 --microexon-spliceprob=$computation.microexon_spliceprob
116 #end if
117 #end if
118 #if $splicing.options == "advanced":
119 $splicing.novelsplicing
120 #if $splicing.localsplicedist.__str__ != '':
121 --localsplicedist=$splicing.localsplicedist
122 #end if
123 #if $splicing.local_splice_penalty.__str__ != '':
124 --local-splice-penalty=$splicing.local_splice_penalty
125 #end if
126 #if $splicing.distant_splice_penalty.__str__ != '':
127 --distant-splice-penalty=$splicing.distant_splice_penalty
128 #end if
129 #if $splicing.local_splice_endlength.__str__ != '':
130 --local-splice-endlength=$splicing.local_splice_endlength
131 #end if
132 #if $splicing.distant_splice_endlength.__str__ != '':
133 --distant-splice-endlength=$splicing.distant_splice_endlength
134 #end if
135 #if $splicing.distant_splice_identity.__str__ != '':
136 --distant-splice-identity=$splicing.distant_splice_identity
137 #end if
138 #end if
139 #if $output.options == "advanced":
140 #if $output.npath.__str__ != '':
141 --npath=$output.npath
142 #end if
143 $output.quiet_if_excessive
144 $output.show_refdiff
145 $output.clip_overlap
146 #end if
147 #if $result.format == "sam":
148 --format=sam
149 $result.no_sam_headers
150 #if $result.read_group_id.__str__.strip != '':
151 --read-group-id='$result.read_group_id'
152 #end if
153 #if $result.read_group_name.__str__ != '':
154 --read-group-name='$result.read_group_name'
155 #end if
156 #if $result.read_group_library.__str__ != '':
157 --read-group-library='$result.read_group_library'
158 #end if
159 #if $result.read_group_platform.__str__ != '':
160 --read-group-platform='$result.read_group_platform'
161 #end if
162 #if $result.quality_shift.__str__ != '':
163 --quality-shift=$result.quality_shift
164 #end if
165 #elif $result.format == "goby":
166 #if $result.goby_output.__str__ != '':
167 --goby-output='$result.goby_output'
168 #end if
169 #if $result.creads_window_start.__str__ != '':
170 --creads-window-start=$result.creads_window_start
171 #end if
172 #if $result.creads_window_end.__str__ != '':
173 --creads-window-end=$result.creads_window_end
174 #end if
175 $result.creads_complement
176 #end if
177 #if $results.split_output == 'yes':
178 --split-output=gsnap_out
179 #if $results.fails.choice == 'nofails':
180 --nofails
181 #elif $results.fails.choice == 'failsonly':
182 --failsonly
183 #end if
184 $results.fails_as_input
185 #else
186 #if $results.fails.choice == 'nofails':
187 --nofails
188 #elif $results.fails.choice == 'failsonly':
189 --failsonly
190 $results.fails.fails_as_input
191 #end if
192 #end if
193 #if $seq.format == "gsnap_fasta":
194 $seq.circularinput $seq.gsnap
195 #else if $seq.format == "fastq":
196 #if $seq.barcode_length.__str__ != '':
197 --barcode-length=$seq.barcode_length
198 #end if
199 #if $seq.fastq_id_start.__str__ != '':
200 --fastq-id-start=$seq.fastq_id_start
201 #end if
202 #if $seq.fastq_id_end.__str__ != '':
203 --fastq-id-end=$seq.fastq_id_end
204 #end if
205 #if $seq.filter_chastity.__str__ != 'off':
206 --filter-chastity=$seq.filter_chastity
207 #end if
208 #if $seq.paired.ispaired.__str__ == 'yes':
209 #if $seq.paired.pairmax_dna.__str__ != '':
210 --pairmax-dna=$seq.paired.pairmax_dna
211 #end if
212 #if $seq.paired.pairmax_rna.__str__ != '':
213 --pairmax-rna=$seq.paired.pairmax_rna
214 #end if
215 #if $seq.paired.pairexpect.__str__ != '':
216 --pairexpect=$seq.paired.pairexpect
217 #end if
218 #if $seq.paired.pairdev.__str__ != '':
219 --pairdev=$seq.paired.pairdev
220 #end if
221 $seq.fastq $seq.paired.fastq
222 #else
223 $seq.fastq
224 #end if
225 #end if
226 #if $results.split_output == 'yes':
227 2> $gsnap_stderr
228 #else:
229 #if $results.fails.choice.__str__ == 'failsonly' and $results.fails.fails_as_input.__str__ != '':
230 2> $gsnap_stderr > $gsnap_fq
231 #else
232 2> $gsnap_stderr > $gsnap_out
233 #end if
234 #end if
235
236 </command>
237 <inputs>
238 <!-- Input data -->
239 <conditional name="seq">
240 <param name="format" type="select" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select the input format" help="">
241 <option value="fastq">Fastq</option>
242 <!--
243 <option value="goby">Goby compact-reads</option>
244 -->
245 <option value="gsnap_fasta">GNSAP fasta</option>
246 </param>
247 <when value="fastq">
248 <param name="fastq" type="data" format="fastq" label="Select a fastq dataset" />
249 <conditional name="paired">
250 <param name="ispaired" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use Paired Reads?"/>
251 <when value="no"/>
252 <when value="yes">
253 <param name="fastq" type="data" format="fastq" label="Select the paired reads reverse dataset" />
254 <param name="orientation" type="select" label="Orientation of paired-end reads" help="">
255 <option value="FR">fwd-rev, typical Illumina default</option>
256 <option value="RF">rev-fwd, for circularized inserts</option>
257 <option value="FF">fwd-fwd, same strand</option>
258 </param>
259 <param name="pairmax_dna" type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/>
260 <param name="pairmax_rna" type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used when novel splicing is specified or a splice file is provided. Should probably match the value for localsplicedist."/>
261 <param name="pairexpect" type="integer" value="" optional="true" label="Expected paired-end length"
262 help="Used for calling splices in medial part of paired-end reads (default 200)"/>
263 <param name="pairdev" type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length"
264 help="Used for calling splices in medial part of paired-end reads (default 25)"/>
265 </when>
266 </conditional>
267 <param name="barcode_length" type="integer" value="" optional="true" label="Amount of barcode to remove from start of read (default 0)" />
268 <param name="fastq_id_start" type="integer" value="" optional="true" label="Starting field of identifier in FASTQ header, whitespace-delimited, starting from 1" />
269 <param name="fastq_id_end" type="integer" value="" optional="true" label="Ending field of identifier in FASTQ header, whitespace-delimited, starting from 1"
270 help="Examples:
271 &lt;br&gt;@HWUSI-EAS100R:6:73:941:1973#0/1
272 &lt;br&gt; . start=1, end=1 (default) => identifier is HWUSI-EAS100R:6:73:941:1973#0/1
273 &lt;br&gt;@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
274 &lt;br&gt; . start=1, end=1 => identifier is SRR001666.1
275 &lt;br&gt; . start=2, end=2 => identifier is 071112_SLXA-EAS1_s_7:5:1:817:345
276 &lt;br&gt; . start=1, end=2 => identifier is SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345"
277 />
278 <param name="filter_chastity" type="select" label="Skip reads marked by the Illumina chastity program"
279 help="String after the accession having a 'Y' after the first colon, like this:
280 &lt;br&gt;@accession 1:Y:0:CTTGTA
281 &lt;br&gt;where the 'Y' signifies filtering by chastity.
282 &lt;br&gt; For 'either', a 'Y' on either end of a paired-end read will be filtered.
283 &lt;br&gt; For 'both', a 'Y' is required on both ends of a paired-end read (or on the only end of a single-end read)"
284 >
285 <option value="off">off - no filtering</option>
286 <option value="either">either - a 'Y' on either end of a paired-end read</option>
287 <option value="both">both - a 'Y' is required on both ends of a paired-end read or the only end of a single-end read</option>
288 </param>
289 </when>
290 <!--
291 <when value="goby">
292 </when>
293 -->
294 <when value="gsnap_fasta">
295 <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/>
296 <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/>
297 </when>
298
299 </conditional>
300 <!-- No longer in options as of version 2011-11-30
301 <param name="mapq_unique_score" type="integer" value="" optional="true" label="MAPQ score threshold"
302 help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this
303 (if not selected, then reports all multiple results, up to npaths)" />
304 -->
305
306 <!-- GMAPDB for alignment -->
307 <conditional name="refGenomeSource">
308 <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Align To&lt;/H2&gt;Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
309 <option value="indexed">Use a built-in index</option>
310 <option value="gmapdb">Use a gmapdb from your history</option>
311 </param>
312 <when value="indexed">
313 <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
314 <options from_file="gmap_indices.loc">
315 <column name="uid" index="0" />
316 <column name="dbkey" index="1" />
317 <column name="name" index="2" />
318 <column name="kmers" index="3" />
319 <column name="maps" index="4" />
320 <column name="snps" index="5" />
321 <column name="value" index="6" />
322 </options>
323 </param>
324
325 <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
326 <options from_file="gmap_indices.loc">
327 <column name="name" index="3"/>
328 <column name="value" index="3"/>
329 <filter type="param_value" ref="gmapindex" column="6"/>
330 <filter type="multiple_splitter" column="3" separator=","/>
331 <filter type="add_value" name="" value=""/>
332 <filter type="sort_by" column="3"/>
333 </options>
334 </param>
335
336 <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase.">
337 <option value="">standard</option>
338 <option value="cmet-stranded">cmet-stranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
339 <option value="cmet-nonstranded">cmet-nonstranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
340 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option>
341 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option>
342 </param>
343
344 <conditional name="use_splicing">
345 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
346 help="Look for splicing involving known sites or known introns at short or long distances
347 See README instructions for the distinction between known sites and known introns">
348 <option value="none" selected="true">None</option>
349 <option value="gmapdb">From the GMAP Database</option>
350 <option value="history">A Map in your history</option>
351 </param>
352 <when value="none"/>
353 <when value="history">
354 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map"
355 help="built with GMAP IIT"/>
356 </when>
357 <when value="gmapdb">
358 <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
359 <options from_file="gmap_indices.loc">
360 <column name="name" index="4"/>
361 <column name="value" index="4"/>
362 <filter type="param_value" ref="gmapindex" column="6"/>
363 <filter type="multiple_splitter" column="4" separator=","/>
364 <filter type="add_value" name="" value=""/>
365 <filter type="sort_by" column="4"/>
366 </options>
367 </param>
368 </when>
369 </conditional>
370
371 <conditional name="use_snps">
372 <param name="src" type="select" label="&lt;HR&gt;Known SNPs" help="for SNP tolerant alignments">
373 <option value="none" selected="true">None</option>
374 <option value="gmapdb">From the GMAP Database</option>
375 <option value="history">A SNP Index in your history</option>
376 </param>
377 <when value="none"/>
378 <when value="history">
379 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex"
380 help="built with GMAP SNP Index"/>
381 </when>
382 <when value="gmapdb">
383 <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
384 <options from_file="gmap_indices.loc">
385 <column name="name" index="5"/>
386 <column name="value" index="5"/>
387 <filter type="param_value" ref="gmapindex" column="6"/>
388 <filter type="multiple_splitter" column="5" separator=","/>
389 <filter type="add_value" name="" value=""/>
390 <filter type="sort_by" column="5"/>
391 </options>
392 </param>
393 </when>
394 </conditional>
395
396 </when>
397 <when value="gmapdb">
398 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
399 help="A GMAP database built with GMAP Build"/>
400 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
401 <options>
402 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
403 </options>
404 </param>
405
406 <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase.">
407 <option value="">standard</option>
408 <option value="cmet-stranded">cmet-stranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
409 <option value="cmet-nonstranded">cmet-nonstranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
410 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option>
411 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option>
412 </param>
413
414 <conditional name="use_splicing">
415 <param name="src" type="select" label="&lt;HR&gt;Known Splicesite and Introns"
416 help="Look for splicing involving known sites or known introns at short or long distances
417 See README instructions for the distinction between known sites and known introns">
418 <option value="none" selected="true">None</option>
419 <option value="gmapdb">From the GMAP Database</option>
420 <option value="history">A Map in your history</option>
421 </param>
422 <when value="none"/>
423 <when value="history">
424 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map"
425 help="built with GMAP IIT"/>
426 <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
427 help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.
428 This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
429 </when>
430 <when value="gmapdb">
431 <param name="splicemap" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
432 <options>
433 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
434 </options>
435 </param>
436 <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
437 help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.
438 This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
439 </when>
440 </conditional>
441
442 <conditional name="use_snps">
443 <param name="src" type="select" label="&lt;HR&gt;Known SNPs" help="for SNP tolerant alignments">
444 <option value="none" selected="true">None</option>
445 <option value="gmapdb">From the GMAP Database</option>
446 <option value="history">A SNP Index in your history</option>
447 </param>
448 <when value="none"/>
449 <when value="history">
450 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex"
451 help="built with GMAP SNP Index"/>
452 </when>
453 <when value="gmapdb">
454 <param name="snpindex" type="select" data_ref="gmapdb" label="Use database containing known SNPs" help="">
455 <options>
456 <filter type="data_meta" ref="gmapdb" key="snps" multiple="True" separator=","/>
457 </options>
458 </param>
459 </when>
460 </conditional>
461
462 </when>
463 </conditional>
464
465 <!-- Computation options -->
466 <conditional name="computation">
467 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
468 <option value="default">Use default settings</option>
469 <option value="advanced">Set Computation Options</option>
470 </param>
471 <when value="default"/>
472 <when value="advanced">
473 <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)"
474 help="Defaults to the ultrafast level of ((readlength+2)/12 - 2)).
475 If specified between 0.0 and 1.0, then treated as a fraction
476 of each read length. Otherwise, treated as an integral number
477 of mismatches (including indel and splicing penalties)
478 For RNA-Seq, you may need to increase this value slightly
479 to align reads extending past the ends of an exon.">
480 <validator type="in_range" message="The mismatches must >= 0." min="0."/>
481 </param>
482 <param name="query_unk_mismatch" type="boolean" checked="false" truevalue="--query-unk-mismatch=1" falsevalue="" label="Count unknown (N) characters in the query as a mismatch"/>
483 <param name="genome_unk_mismatch" type="boolean" checked="true" truevalue="" falsevalue="--genome-unk-mismatch=0" label="Count unknown (N) characters in the genome as a mismatch"/>
484 <param name="terminal_threshold" type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 2)"
485 help="(from one end of the read to the best possible position at the other end). For example, if this value is 2, then if GSNAP finds an exact or
486 1-mismatch alignment, it will not try to find a terminal alignment.
487 Note that this default value may not be low enough if you want to
488 obtain terminal alignments for very short reads, although such reads
489 probably don't have enough specificity for terminal alignments anyway." />
490 <param name="indel_penalty" type="integer" value="" optional="true" label="Penalty for an indel (default 2)"
491 help="Counts against mismatches allowed. To find indels, make indel-penalty less than or equal to max-mismatches. A value &lt; 2 can lead to false positives at read ends" />
492 <param name="indel_endlength" type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
493 <param name="max_middle_insertions" type="integer" value="" optional="true" label="Maximum number of middle insertions allowed (default 9)" />
494 <param name="max_middle_deletions" type="integer" value="" optional="true" label="Maximum number of middle deletions allowed (default 30)" />
495 <param name="max_end_insertions" type="integer" value="" optional="true" label="Maximum number of end insertions allowed (default 3)" />
496 <param name="max_end_deletions" type="integer" value="" optional="true" label="Maximum number of end deletions allowed (default 6)" />
497 <param name="suboptimal_levels" type="integer" value="" optional="true" label="Report suboptimal hits beyond best hit (default 0)"
498 help="All hits with best score plus suboptimal-levels are reported" />
499 <param name="adapter_strip" type="select" label="Method for removing adapters from reads"
500 help="paired removes adapters from paired-end reads if a concordant or paired alignment cannot be found from the original read">
501 <option value="paired" selected="true">paired</option>
502 <option value="off">off</option>
503 </param>
504 <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)"
505 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/>
506 <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)"
507 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive indels at the ends of reads)"/>
508 <param name="use_tally" type="data" format="tally.iit" optional="true" metadata_name="dbkey" label="Select a tally IIT file to resolve concordant multiple results"
509 help="generated by gsnap_tally and iit_store"/>
510
511 <!--
512 tallydir=STRING Directory for tally IIT file to resolve concordant multiple results (default is
513 location of genome index files specified using -D and -d). Note: can
514 just give full path name to use-tally instead.
515 use-tally=STRING Use this tally IIT file to resolve concordant multiple results
516 runlengthdir=STRING Directory for runlength IIT file to resolve concordant multiple results (default is
517 location of genome index files specified using -D and -d). Note: can
518 just give full path name to use-runlength instead.
519 use-runlength=STRING Use this runlength IIT file to resolve concordant multiple results
520 -->
521
522 <!-- Options for GMAP alignment within GSNAP -->
523 <param name="gmap_mode" type="select" multiple="true" optional="true" display="checkboxes" label="Cases to use GMAP for complex alignments containing multiple splices or indels"
524 help="Default: pairsearch,terminal,improve">
525 <option value="pairsearch" selected="true">pairsearch</option>
526 <option value="terminal" selected="true">terminal</option>
527 <option value="improve" selected="true">improve</option>
528 </param>
529 <param name="trigger_score_for_gmap" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 5)"
530 help="Try GMAP pairsearch on nearby genomic regions if best score (the total of both ends if paired-end) exceeds this value (default 5)" />
531 <param name="max_gmap_pairsearch" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 3)"
532 help="Perform GMAP pairsearch on nearby genomic regions up to this many candidate ends (default 3)." />
533 <param name="max_gmap_terminal" type="integer" value="" optional="true" label="GMAP terminal threshold (default 3)"
534 help="Perform GMAP terminal on nearby genomic regions up to this many candidate ends (default 3)." />
535 <param name="max_gmap_improvement" type="integer" value="" optional="true" label="GMAP improvement threshold (default 3)"
536 help="Perform GMAP improvement on nearby genomic regions up to this many candidate ends (default 3)." />
537 <param name="microexon_spliceprob" type="float" value="" optional="true" label="GMAP microexons threshold (default .90)"
538 help="Allow microexons only if one of the splice site probabilities is greater than this value." >
539 <validator type="in_range" message="The microexons probability must be between 0. and 1." min="0." max="1."/>
540 </param>
541 </when>
542 </conditional>
543
544 <conditional name="splicing">
545 <param name="options" type="select" label="&lt;HR&gt;Splicing options for RNA-Seq" help="">
546 <option value="default">Use default settings</option>
547 <option value="advanced">Set Splicing Options</option>
548 </param>
549 <when value="default"/>
550 <when value="advanced">
551 <!-- Splicing options for RNA-Seq -->
552 <!-- use-splicing This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splicing -->
553 <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) -->
554 <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/>
555 <param name="localsplicedist" type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/>
556 <param name="local_splice_penalty" type="integer" value="" optional="true" label="Penalty for a local splice (default 0). Counts against mismatches allowed"/>
557 <param name="distant_splice_penalty" type="integer" value="" optional="true" label="Penalty for a distant splice (default 3). Counts against mismatches allowed"
558 help="A distant splice is one where the intron length exceeds the value of localsplicedist or is an
559 inversion, scramble, or translocation between two different chromosomes. Counts against mismatches allowed"/>
560 <param name="distant_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for distant spliced alignments"
561 help="(default 16, min is the kmer length)"/>
562 <param name="shortend_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for short-end spliced alignments"
563 help="(default 2, but unless known splice sites are provided, GSNAP may still need the end length to be the value of kmer size to find a given splice"/>
564 <param name="distant_splice_identity" type="float" value="" optional="true" label="Minimum identity at end required for distant spliced alignments (default 0.95)"/>
565 <param name="antistranded_penalty" type="integer" value="" optional="true" label="Penalty for antistranded splicing when using stranded RNA-Seq protocols"
566 help="A positive value, such as 1, expects antisense on the first read and sense on the second read.
567 Default is 0, which treats sense and antisense equally well"/>
568 </when>
569 </conditional>
570
571 <!-- Output data -->
572 <conditional name="output">
573 <param name="options" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Output options for RNA-Seq" help="">
574 <option value="default">Use default settings</option>
575 <option value="advanced">Set Output Options</option>
576 </param>
577 <when value="default"/>
578 <when value="advanced">
579 <param name="npath" type="integer" value="" optional="true" label="Maximum number of paths to print (default 100)"/>
580 <param name="quiet_if_excessive" type="boolean" checked="false" truevalue="--quiet-if-excessive" falsevalue="" label="Quiet if Excessive"
581 help="If more than maximum number of paths are found, then nothing is printed."/>
582 <param name="show_refdiff" type="boolean" checked="false" truevalue="--show-refdiff" falsevalue="" label="Show SNP-tolerant alignment"
583 help="For GSNAP output in SNP-tolerant alignment, shows all differences relative to the reference genome as lower case (otherwise, it shows all differences relative to both the reference and alternate genome)"/>
584 <param name="clip_overlap" type="boolean" checked="false" truevalue="--clip-overlap" falsevalue="" label="Clip Overlap"
585 help="For paired-end reads whose alignments overlap, clip the overlapping region."/>
586 </when>
587 </conditional>
588 <conditional name="result">
589 <param name="format" type="select" label="Select the output format" help="">
590 <option value="sam">SAM</option>
591 <!-- goby should only be an option if the input is in goby format
592 <option value="goby">Goby</option>
593 -->
594 <option value="gsnap">GSNAP default output</option>
595 </param>
596 <when value="gsnap">
597 </when>
598 <when value="sam">
599 <param name="no_sam_headers" type="boolean" truevalue="--no-sam-headers" falsevalue="" checked="false" label="Do not print headers beginning with '@'"/>
600 <param name="read_group_id" type="text" value="" optional="true" label="Value to put into read-group id (RG-ID) field"/>
601 <param name="read_group_name" type="text" value="" optional="true" label="Value to put into read-group name (RG-SM) field"/>
602 <param name="read_group_library" type="text" value="" optional="true" label="Value to put into read-group library (RG-LB) field"/>
603 <param name="read_group_platform" type="text" value="" optional="true" label="Value to put into read-group library platform (RG-PL) field"/>
604 <param name="quality_shift" type="integer" value="" optional="true" label="Shift FASTQ quality scores by this amount in SAM output (default -31)"/>
605 </when>
606 <!--
607 <when value="goby">
608 <param name="goby_output" type="text" value="" label="Basename for Goby output files"/>
609 <param name="creads_window_start" type="integer" value="" optional="true" label="Compact reads window start (default: 0=start of file)"/>
610 <param name="creads_window_end" type="integer" value="" optional="true" label="Compact reads window end (default: 0=end of file)"/>
611 <param name="creads_complement" type="boolean" truevalue="-\-creads-complement" falsevalue="" checked="false" label="Complement read sequences (without reversing)"/>
612 </when>
613 -->
614 </conditional>
615 <!-- TODO combine fails and split_output -->
616
617 <conditional name="results">
618 <param name="split_output" type="select" label="&lt;HR&gt;Split outputs"
619 help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results">
620 <option value="no">no</option>
621 <option value="yes">yes</option>
622 </param>
623 <when value="no">
624 <conditional name="fails">
625 <param name="choice" type="select" label="How to deal with fails" help="">
626 <option value="default">default - include them in results</option>
627 <option value="nofails">nofails - exclude fails from results</option>
628 <option value="failsonly">failsonly - only output failing results</option>
629 </param>
630 <when value="default"/>
631 <when value="nofails"/>
632 <when value="failsonly">
633 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
634 help=""/>
635 </when>
636 </conditional>
637 </when>
638 <when value="yes">
639 <conditional name="fails">
640 <param name="choice" type="select" label="How to deal with fails" help="">
641 <option value="default">default - include them in results</option>
642 <option value="nofails">nofails - exclude fails from results</option>
643 <option value="failsonly">failsonly - only output failing results</option>
644 </param>
645 <when value="default"/>
646 <when value="nofails"/>
647 <when value="failsonly"/>
648 </conditional>
649 <param name="fails_as_input" type="boolean" truevalue="--fails-as-input" falsevalue="" checked="false" label="Print completely failed alignments as input FASTA or FASTQ format"
650 help=""/>
651 </when>
652 </conditional>
653
654 </inputs>
655 <outputs>
656 <data format="txt" name="gsnap_stderr" label="${tool.name} on ${on_string}: gsnap.log"/>
657
658 <data format="txt" name="gsnap_out" label="${tool.name} on ${on_string} ${result.format}" >
659 <filter>(results['split_output'] == 'no' and (results['fails']['choice'] != 'failsonly' or results['fails']['fails_as_input'] == False))</filter>
660 <change_format>
661 <when input="result['format']" value="sam" format="sam"/>
662 <when input="result['format']" value="gsnap" format="gsnap"/>
663 </change_format>
664 </data>
665
666 <data format="fastq" name="gsnap_fq" label="${tool.name} on ${on_string} fails.fq" >
667 <filter>(results['split_output'] == 'no' and results['fails']['choice'] == 'failsonly' and results['fails']['fails_as_input'] == True)</filter>
668 </data>
669
670 <!-- nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, concordant_mult -->
671
672 <data format="txt" name="unpaired_mult" label="${tool.name} on ${on_string} unpaired_mult.${result.format}" from_work_dir="gsnap_out.unpaired_mult">
673 <filter>(results['split_output'] == 'yes')</filter>
674 <change_format>
675 <when input="result['format']" value="sam" format="sam"/>
676 <when input="result['format']" value="gsnap" format="gsnap"/>
677 </change_format>
678 </data>
679 <data format="txt" name="unpaired_uniq" label="${tool.name} on ${on_string} unpaired_uniq.${result.format}" from_work_dir="gsnap_out.unpaired_uniq">
680 <filter>(results['split_output'] == 'yes')</filter>
681 <change_format>
682 <when input="result['format']" value="sam" format="sam"/>
683 <when input="result['format']" value="gsnap" format="gsnap"/>
684 </change_format>
685 </data>
686 <data format="txt" name="unpaired_transloc" label="${tool.name} on ${on_string} unpaired_transloc.${result.format}" from_work_dir="gsnap_out.unpaired_transloc">
687 <filter>(results['split_output'] == 'yes')</filter>
688 <change_format>
689 <when input="result['format']" value="sam" format="sam"/>
690 <when input="result['format']" value="gsnap" format="gsnap"/>
691 </change_format>
692 </data>
693 <data format="txt" name="halfmapping_mult" label="${tool.name} on ${on_string} halfmapping_mult.${result.format}" from_work_dir="gsnap_out.halfmapping_mult">
694 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
695 <change_format>
696 <when input="result['format']" value="sam" format="sam"/>
697 <when input="result['format']" value="gsnap" format="gsnap"/>
698 </change_format>
699 </data>
700 <data format="txt" name="halfmapping_uniq" label="${tool.name} on ${on_string} halfmapping_uniq.${result.format}" from_work_dir="gsnap_out.halfmapping_uniq">
701 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
702 <change_format>
703 <when input="result['format']" value="sam" format="sam"/>
704 <when input="result['format']" value="gsnap" format="gsnap"/>
705 </change_format>
706 </data>
707 <data format="txt" name="halfmapping_transloc" label="${tool.name} on ${on_string} halfmapping_transloc.${result.format}" from_work_dir="gsnap_out.halfmapping_transloc">
708 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
709 <change_format>
710 <when input="result['format']" value="sam" format="sam"/>
711 <when input="result['format']" value="gsnap" format="gsnap"/>
712 </change_format>
713 </data>
714 <data format="txt" name="paired_mult" label="${tool.name} on ${on_string} paired_mult.${result.format}" from_work_dir="gsnap_out.paired_mult">
715 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
716 <change_format>
717 <when input="result['format']" value="sam" format="sam"/>
718 <when input="result['format']" value="gsnap" format="gsnap"/>
719 </change_format>
720 </data>
721 <data format="txt" name="paired_uniq" label="${tool.name} on ${on_string} paired_uniq.${result.format}" from_work_dir="gsnap_out.paired_uniq">
722 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
723 <change_format>
724 <when input="result['format']" value="sam" format="sam"/>
725 <when input="result['format']" value="gsnap" format="gsnap"/>
726 </change_format>
727 </data>
728 <data format="txt" name="paired_transloc" label="${tool.name} on ${on_string} paired_transloc.${result.format}" from_work_dir="gsnap_out.paired_transloc">
729 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
730 <change_format>
731 <when input="result['format']" value="sam" format="sam"/>
732 <when input="result['format']" value="gsnap" format="gsnap"/>
733 </change_format>
734 </data>
735
736 <data format="txt" name="concordant_mult" label="${tool.name} on ${on_string} concordant_mult.${result.format}" from_work_dir="gsnap_out.concordant_mult">
737 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
738 <change_format>
739 <when input="result['format']" value="sam" format="sam"/>
740 <when input="result['format']" value="gsnap" format="gsnap"/>
741 </change_format>
742 </data>
743 <data format="txt" name="concordant_uniq" label="${tool.name} on ${on_string} concordant_uniq.${result.format}" from_work_dir="gsnap_out.concordant_uniq">
744 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
745 <change_format>
746 <when input="result['format']" value="sam" format="sam"/>
747 <when input="result['format']" value="gsnap" format="gsnap"/>
748 </change_format>
749 </data>
750 <data format="txt" name="concordant_transloc" label="${tool.name} on ${on_string} concordant_transloc.${result.format}" from_work_dir="gsnap_out.concordant_transloc">
751 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
752 <change_format>
753 <when input="result['format']" value="sam" format="sam"/>
754 <when input="result['format']" value="gsnap" format="gsnap"/>
755 </change_format>
756 </data>
757
758 <data format="txt" name="nomapping" label="${tool.name} on ${on_string} nomapping.${result.format}" from_work_dir="gsnap_out.nomapping">
759 <filter>(results['split_output'] == 'yes' and results['fails_as_input'] == False)</filter>
760 <change_format>
761 <when input="result['format']" value="sam" format="sam"/>
762 <when input="result['format']" value="gsnap" format="gsnap"/>
763 </change_format>
764 </data>
765
766 <data format="fastq" name="nomapping_fq" label="${tool.name} on ${on_string} nomapping.fq" from_work_dir="gsnap_out.nomapping.fq">
767 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == False)</filter>
768 </data>
769
770 <data format="fastq" name="nomapping_1_fq" label="${tool.name} on ${on_string} nomapping.1.fq" from_work_dir="gsnap_out.nomapping.1.fq">
771 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
772 </data>
773
774 <data format="fastq" name="nomapping_2_fq" label="${tool.name} on ${on_string} nomapping.2.fq" from_work_dir="gsnap_out.nomapping.2.fq">
775 <filter>(results['split_output'] == 'yes' and seq['format'] == 'fastq' and seq['paired']['ispaired'] == True)</filter>
776 </data>
777
778 <!-- Will problay need wrapper code to generate composite datatype for goby alignment
779 <data format="gobyalignment" name="goby_alignment" label="${tool.name} on ${on_string} uniq.${result.format}" from_work_dir="gsnap_out.nomapping">
780 <filter>result['format'] == 'goby'</filter>
781 </data>
782 -->
783
784 </outputs>
785 <tests>
786 </tests>
787
788 <help>
789
790 **What it does**
791
792 GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.
793 Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10.
794
795 .. _GSNAP: http://research-pub.gene.com/gmap/
796 .. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873
797 http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed
798
799 ------
800
801 **Know what you are doing**
802
803 .. class:: warningmark
804
805 You will want to read the README_
806
807 .. _README: http://research-pub.gene.com/gmap/src/README
808
809 ------
810
811 **Input formats**
812
813 Input to GSNAP should be either in FASTQ or FASTA format.
814
815 The FASTQ input may include quality scores, which will then be included in SAM
816 output, if that output format is selected.
817
818 For FASTA format, you should include one line per read (or end of a
819 paired-end read). The same FASTA file can have a mixture of
820 single-end and paired-end reads of varying lengths, if desired.
821
822 Single-end reads:
823
824 Each FASTA entry should contain one short read per line, like this
825
826 >Header information
827 AAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTA
828
829 Each short read can have a different length. However, the entire read
830 needs to be on a single line, and may not wrap around multiple lines.
831 If it extends to a second line, GSNAP will think that the read is
832 paired-end.
833
834
835 Paired-end reads:
836
837 Each FASTA entry should contain two short reads, one per line, like
838 this
839
840 >Header information
841 AAAACATTCTCCTCCGCATAAGCCTAGTAGATTA
842 GGCGTAGGTAGAAGTAGAGGTTAAGGCGCGTCAG
843
844 By default, the program assumes that the second end is in the reverse
845 complement direction compared with the first end. If they are in the
846 same direction, you may need to use the --circular-input (or -c) flag.
847
848 ( The Galaxy tool: "FASTA Width formatter" can be used to reformat fasta files to have single line sequences. )
849
850 ------
851
852 **Output formats in GSNAP**
853
854 SAM output format
855
856 Default GSNAP format
857 See the README_
858
859
860
861
862 </help>
863 </tool>
864