comparison tools/ngs_rna/tophat_color_wrapper.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 <tool id="tophat_color" name="Tophat for SOLiD" version="1.0.0">
2 <description>Find splice junctions using RNA-seq data</description>
3 <requirements>
4 <requirement type="package">tophat</requirement>
5 </requirements>
6 <command interpreter="python">
7 tophat_wrapper.py
8 ## Change this to accommodate the number of threads you have available.
9 --num-threads="4"
10
11 ## base- or color-space
12 --color-space
13
14 ## Provide outputs.
15 --junctions-output=$junctions
16 --hits-output=$accepted_hits
17
18 ## Handle reference file.
19 #if $refGenomeSource.genomeSource == "history":
20 --own-file=$refGenomeSource.ownFile
21 #else:
22 --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ 'tophat_indexes_color' ].get_fields() )[0][-1] }"
23 #end if
24
25 ## Are reads single-end or paired?
26 --single-paired=$singlePaired.sPaired
27
28 ## First input file always required.
29 --input1=$input1
30
31 ## Set params based on whether reads are single-end or paired.
32 #if $singlePaired.sPaired == "single":
33 --settings=$singlePaired.sParams.sSettingsType
34 #if $singlePaired.sParams.sSettingsType == "full":
35 -a $singlePaired.sParams.anchor_length
36 -m $singlePaired.sParams.splice_mismatches
37 -i $singlePaired.sParams.min_intron_length
38 -I $singlePaired.sParams.max_intron_length
39 -F $singlePaired.sParams.junction_filter
40 -g $singlePaired.sParams.max_multihits
41 --min-segment-intron $singlePaired.sParams.min_segment_intron
42 --max-segment-intron $singlePaired.sParams.max_segment_intron
43 --seg-mismatches=$singlePaired.sParams.seg_mismatches
44 --seg-length=$singlePaired.sParams.seg_length
45 --library-type=$singlePaired.sParams.library_type
46
47 ## Indel search.
48 #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":
49 --allow-indels
50 --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length
51 --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length
52 #end if
53
54 ## Supplying junctions parameters.
55 #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":
56 #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":
57 -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model
58 #end if
59 #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":
60 -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs
61 #end if
62 ## TODO: No idea why a string cast is necessary, but it is:
63 #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":
64 --no-novel-juncs
65 #end if
66 #end if
67
68 #if $singlePaired.sParams.closure_search.use_search == "Yes":
69 --closure-search
70 --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon
71 --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron
72 --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron
73 #else:
74 --no-closure-search
75 #end if
76 #if $singlePaired.sParams.coverage_search.use_search == "Yes":
77 --coverage-search
78 --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron
79 --max-coverage-intron $singlePaired.sParams.coverage_search.max_coverage_intron
80 #else:
81 --no-coverage-search
82 #end if
83 ## TODO: No idea why the type conversion is necessary, but it seems to be.
84 #if str($singlePaired.sParams.microexon_search) == "Yes":
85 --microexon-search
86 #end if
87 #end if
88 #else:
89 --input2=$singlePaired.input2
90 -r $singlePaired.mate_inner_distance
91 --settings=$singlePaired.pParams.pSettingsType
92 #if $singlePaired.pParams.pSettingsType == "full":
93 --mate-std-dev=$singlePaired.pParams.mate_std_dev
94 -a $singlePaired.pParams.anchor_length
95 -m $singlePaired.pParams.splice_mismatches
96 -i $singlePaired.pParams.min_intron_length
97 -I $singlePaired.pParams.max_intron_length
98 -F $singlePaired.pParams.junction_filter
99 -g $singlePaired.pParams.max_multihits
100 --min-segment-intron $singlePaired.pParams.min_segment_intron
101 --max-segment-intron $singlePaired.pParams.max_segment_intron
102 --seg-mismatches=$singlePaired.pParams.seg_mismatches
103 --seg-length=$singlePaired.pParams.seg_length
104 --library-type=$singlePaired.pParams.library_type
105
106 ## Indel search.
107 #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes":
108 --allow-indels
109 --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length
110 --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length
111 #end if
112
113 ## Supplying junctions parameters.
114 #if $singlePaired.pParams.own_junctions.use_junctions == "Yes":
115 #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes":
116 -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model
117 #end if
118 #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes":
119 -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs
120 #end if
121 ## TODO: No idea why type cast is necessary, but it is:
122 #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes":
123 --no-novel-juncs
124 #end if
125 #end if
126
127 #if $singlePaired.pParams.closure_search.use_search == "Yes":
128 --closure-search
129 --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon
130 --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron
131 --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron
132 #else:
133 --no-closure-search
134 #end if
135 #if $singlePaired.pParams.coverage_search.use_search == "Yes":
136 --coverage-search
137 --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron
138 --max-coverage-intron $singlePaired.pParams.coverage_search.max_coverage_intron
139 #else:
140 --no-coverage-search
141 #end if
142 ## TODO: No idea why the type conversion is necessary, but it seems to be.
143 #if str ($singlePaired.pParams.microexon_search) == "Yes":
144 --microexon-search
145 #end if
146 #end if
147 #end if
148 </command>
149 <inputs>
150 <param format="fastqcssanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
151 <conditional name="refGenomeSource">
152 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
153 <option value="indexed">Use a built-in index</option>
154 <option value="history">Use one from the history</option>
155 </param>
156 <when value="indexed">
157 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
158 <options from_data_table="tophat_indexes_color">
159 <filter type="sort_by" column="2"/>
160 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
161 </options>
162 </param>
163 </when>
164 <when value="history">
165 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
166 </when> <!-- history -->
167 </conditional> <!-- refGenomeSource -->
168 <conditional name="singlePaired">
169 <param name="sPaired" type="select" label="Is this library mate-paired?">
170 <option value="single">Single-end</option>
171 <option value="paired">Paired-end</option>
172 </param>
173 <when value="single">
174 <conditional name="sParams">
175 <param name="sSettingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters.">
176 <option value="preSet">Use Defaults</option>
177 <option value="full">Full parameter list</option>
178 </param>
179 <when value="preSet" />
180 <!-- Full/advanced params. -->
181 <when value="full">
182 <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
183 <option value="fr-unstranded">FR Unstranded</option>
184 <option value="fr-firststrand">FR First Strand</option>
185 <option value="fr-secondstrand">FR Second Strand</option>
186 </param>
187 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
188 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
189 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
190 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
191 <conditional name="indel_search">
192 <param name="allow_indel_search" type="select" label="Allow indel search">
193 <option value="No">No</option>
194 <option value="Yes">Yes</option>
195 </param>
196 <when value="No"/>
197 <when value="Yes">
198 <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
199 <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
200 </when>
201 </conditional>
202 <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
203 <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
204 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
205 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
206 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
207 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
208
209 <!-- Options for supplying own junctions. -->
210 <conditional name="own_junctions">
211 <param name="use_junctions" type="select" label="Use Own Junctions">
212 <option value="No">No</option>
213 <option value="Yes">Yes</option>
214 </param>
215 <when value="Yes">
216 <conditional name="gene_model_ann">
217 <param name="use_annotations" type="select" label="Use Gene Annotation Model">
218 <option value="No">No</option>
219 <option value="Yes">Yes</option>
220 </param>
221 <when value="No" />
222 <when value="Yes">
223 <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
224 </when>
225 </conditional>
226 <conditional name="raw_juncs">
227 <param name="use_juncs" type="select" label="Use Raw Junctions">
228 <option value="No">No</option>
229 <option value="Yes">Yes</option>
230 </param>
231 <when value="No" />
232 <when value="Yes">
233 <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
234 </when>
235 </conditional>
236 <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
237 <option value="No">No</option>
238 <option value="Yes">Yes</option>
239 </param>
240 </when>
241 <when value="No" />
242 </conditional> <!-- /own_junctions -->
243
244 <!-- Closure search. -->
245 <conditional name="closure_search">
246 <param name="use_search" type="select" label="Use Closure Search">
247 <option value="No">No</option>
248 <option value="Yes">Yes</option>
249 </param>
250 <when value="Yes">
251 <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
252 <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
253 <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
254 </when>
255 <when value="No" />
256 </conditional>
257 <!-- Coverage search. -->
258 <conditional name="coverage_search">
259 <param name="use_search" type="select" label="Use Coverage Search">
260 <option selected="true" value="Yes">Yes</option>
261 <option value="No">No</option>
262 </param>
263 <when value="Yes">
264 <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
265 <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
266 </when>
267 <when value="No" />
268 </conditional>
269 <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
270 <option value="No">No</option>
271 <option value="Yes">Yes</option>
272 </param>
273 </when> <!-- full -->
274 </conditional> <!-- sParams -->
275 </when> <!-- single -->
276 <when value="paired">
277 <param format="fastqcssanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
278 <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" />
279 <conditional name="pParams">
280 <param name="pSettingsType" type="select" label="TopHat settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list">
281 <option value="preSet">Commonly used</option>
282 <option value="full">Full parameter list</option>
283 </param>
284 <when value="preSet" />
285 <!-- Full/advanced params. -->
286 <when value="full">
287 <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
288 <option value="fr-unstranded">FR Unstranded</option>
289 <option value="fr-firststrand">FR First Strand</option>
290 <option value="fr-secondstrand">FR Second Strand</option>
291 </param>
292 <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs" help="The standard deviation for the distribution on inner distances between mate pairs."/>
293 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
294 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
295 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
296 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
297 <conditional name="indel_search">
298 <param name="allow_indel_search" type="select" label="Allow indel search">
299 <option value="No">No</option>
300 <option value="Yes">Yes</option>
301 </param>
302 <when value="No"/>
303 <when value="Yes">
304 <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
305 <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
306 </when>
307 </conditional>
308 <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
309 <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
310 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
311 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
312 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
313 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
314 <!-- Options for supplying own junctions. -->
315 <conditional name="own_junctions">
316 <param name="use_junctions" type="select" label="Use Own Junctions">
317 <option value="No">No</option>
318 <option value="Yes">Yes</option>
319 </param>
320 <when value="Yes">
321 <conditional name="gene_model_ann">
322 <param name="use_annotations" type="select" label="Use Gene Annotation Model">
323 <option value="No">No</option>
324 <option value="Yes">Yes</option>
325 </param>
326 <when value="No" />
327 <when value="Yes">
328 <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
329 </when>
330 </conditional>
331 <conditional name="raw_juncs">
332 <param name="use_juncs" type="select" label="Use Raw Junctions">
333 <option value="No">No</option>
334 <option value="Yes">Yes</option>
335 </param>
336 <when value="No" />
337 <when value="Yes">
338 <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
339 </when>
340 </conditional>
341 <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
342 <option value="No">No</option>
343 <option value="Yes">Yes</option>
344 </param>
345 </when>
346 <when value="No" />
347 </conditional> <!-- /own_junctions -->
348
349 <!-- Closure search. -->
350 <conditional name="closure_search">
351 <param name="use_search" type="select" label="Use Closure Search">
352 <option value="No">No</option>
353 <option value="Yes">Yes</option>
354 </param>
355 <when value="Yes">
356 <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
357 <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
358 <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
359 </when>
360 <when value="No" />
361 </conditional>
362 <!-- Coverage search. -->
363 <conditional name="coverage_search">
364 <param name="use_search" type="select" label="Use Coverage Search">
365 <option selected="true" value="Yes">Yes</option>
366 <option value="No">No</option>
367 </param>
368 <when value="Yes">
369 <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
370 <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
371 </when>
372 <when value="No" />
373 </conditional>
374 <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
375 <option value="No">No</option>
376 <option value="Yes">Yes</option>
377 </param>
378 </when> <!-- full -->
379 </conditional> <!-- pParams -->
380 </when> <!-- paired -->
381 </conditional>
382 </inputs>
383
384 <outputs>
385 <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed">
386 <filter>
387 (
388 ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and
389 ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or
390 ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and
391 ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
392 )
393 </filter>
394 <actions>
395 <conditional name="refGenomeSource.genomeSource">
396 <when value="indexed">
397 <action type="metadata" name="dbkey">
398 <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
399 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
400 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
401 </option>
402 </action>
403 </when>
404 <when value="history">
405 <action type="metadata" name="dbkey">
406 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
407 </action>
408 </when>
409 </conditional>
410 </actions>
411 </data>
412 <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed">
413 <filter>
414 (
415 ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and
416 ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or
417 ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and
418 ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
419 )
420 </filter>
421 <actions>
422 <conditional name="refGenomeSource.genomeSource">
423 <when value="indexed">
424 <action type="metadata" name="dbkey">
425 <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
426 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
427 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
428 </option>
429 </action>
430 </when>
431 <when value="history">
432 <action type="metadata" name="dbkey">
433 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
434 </action>
435 </when>
436 </conditional>
437 </actions>
438 </data>
439 <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions">
440 <actions>
441 <conditional name="refGenomeSource.genomeSource">
442 <when value="indexed">
443 <action type="metadata" name="dbkey">
444 <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
445 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
446 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
447 </option>
448 </action>
449 </when>
450 <when value="history">
451 <action type="metadata" name="dbkey">
452 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
453 </action>
454 </when>
455 </conditional>
456 </actions>
457 </data>
458 <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits">
459 <actions>
460 <conditional name="refGenomeSource.genomeSource">
461 <when value="indexed">
462 <action type="metadata" name="dbkey">
463 <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
464 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
465 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
466 </option>
467 </action>
468 </when>
469 <when value="history">
470 <action type="metadata" name="dbkey">
471 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
472 </action>
473 </when>
474 </conditional>
475 </actions>
476 </data>
477 </outputs>
478 <tests>
479 <!-- Test color-space single-end reads with user-supplied reference fasta and preset parameters -->
480 <test>
481 <!-- TopHat commands:
482 cp test-data/tophat_in5.fasta tophat_in5.fa
483 bowtie-build -C -f tophat_in5.fasta tophat_in5
484 tophat -p 1 -C tophat_in5 test-data/tophat_in4.fastqcssanger
485 Rename the files in tmp_dir appropriately
486 -->
487 <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger" />
488 <param name="genomeSource" value="history" />
489 <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
490 <param name="sPaired" value="single" />
491 <param name="sSettingsType" value="preSet" />
492 <output name="junctions" file="tophat_out5j.bed" />
493 <output name="accepted_hits" file="tophat_out5h.bam" compare="sim_size" />
494 </test>
495 <!-- Test color-space single-end reads with pre-built index and full parameters -->
496 <test>
497 <!-- Tophat commands:
498 tophat -p 1 -C -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intron 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search partialMm9chrX_random_cs test-data/tophat_in4.fastqcssanger
499 Replace the + with double-dash
500 Rename the files in tmp_dir appropriately
501 -->
502 <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger"/>
503 <param name="genomeSource" value="indexed"/>
504 <param name="index" value="partialMm9chrX_random_cs" />
505 <param name="sPaired" value="single"/>
506 <param name="sSettingsType" value="full"/>
507 <param name="library_type" value="FR Unstranded"/>
508 <param name="anchor_length" value="8"/>
509 <param name="splice_mismatches" value="0"/>
510 <param name="min_intron_length" value="70"/>
511 <param name="max_intron_length" value="500000"/>
512 <param name="junction_filter" value="0.15"/>
513 <param name="max_multihits" value="40"/>
514 <param name="min_segment_intron" value="50" />
515 <param name="max_segment_intron" value="500000" />
516 <param name="seg_mismatches" value="2"/>
517 <param name="seg_length" value="25"/>
518 <param name="allow_indel_search" value="Yes"/>
519 <param name="max_insertion_length" value="3"/>
520 <param name="max_deletion_length" value="3"/>
521 <param name="use_junctions" value="Yes" />
522 <param name="use_annotations" value="No" />
523 <param name="use_juncs" value="No" />
524 <param name="no_novel_juncs" value="No" />
525 <param name="use_search" value="Yes" />
526 <param name="min_closure_exon" value="50" />
527 <param name="min_closure_intron" value="50" />
528 <param name="max_closure_intron" value="5000" />
529 <param name="use_search" value="Yes" />
530 <param name="min_coverage_intron" value="50" />
531 <param name="max_coverage_intron" value="20000" />
532 <param name="microexon_search" value="Yes" />
533 <output name="insertions" file="tophat_out6i.bed" />
534 <output name="deletions" file="tophat_out6d.bed" />
535 <output name="junctions" file="tophat_out6j.bed" />
536 <output name="accepted_hits" file="tophat_out6h.bam" compare="sim_size" />
537 </test>
538 <!-- Test color-space paired-end reads with pre-built index and preset parameters -->
539 <test>
540 <!-- TopHat commands:
541 tophat -C -o tmp_dir -r 50 -p 1 partialMm9chrX_random_cs test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
542 Rename the files in tmp_dir appropriately
543 -->
544 <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger" />
545 <param name="genomeSource" value="indexed" />
546 <param name="index" value="partialMm9chrX_random_cs" />
547 <param name="sPaired" value="paired" />
548 <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
549 <param name="mate_inner_distance" value="50"/>
550 <param name="pSettingsType" value="preSet" />
551 <output name="junctions" file="tophat_out7j.bed" />
552 <output name="accepted_hits" file="tophat_out7h.bam" compare="sim_size" />
553 </test>
554 <!-- Test color-space paired-end reads with user-supplied reference fasta and full parameters -->
555 <test>
556 <!-- TopHat commands:
557 cp test-data/tophat_in5.fasta tophat_in5.fa
558 bowtie-build -C -f tophat_in5.fa tophat_in5
559 tophat -C -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +library-type fr-unstranded +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intron 15000 +mate-std-dev 20 +segment-mismatch 2 +segment-length 20 +min-segment-intron 50 +max-segment-intron 500000 tophat_in5 test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
560 Replace the + with double-dash
561 Rename the files in tmp_dir appropriately
562 -->
563 <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger"/>
564 <param name="genomeSource" value="history"/>
565 <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
566 <param name="sPaired" value="paired"/>
567 <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
568 <param name="mate_inner_distance" value="20"/>
569 <param name="pSettingsType" value="full"/>
570 <param name="library_type" value="FR Unstranded"/>
571 <param name="mate_std_dev" value="20"/>
572 <param name="anchor_length" value="8"/>
573 <param name="splice_mismatches" value="0"/>
574 <param name="min_intron_length" value="70"/>
575 <param name="max_intron_length" value="500000"/>
576 <param name="junction_filter" value="0.15"/>
577 <param name="max_multihits" value="40"/>
578 <param name="min_segment_intron" value="70" />
579 <param name="max_segment_intron" value="400000" />
580 <param name="seg_mismatches" value="2"/>
581 <param name="seg_length" value="20"/>
582 <param name="allow_indel_search" value="Yes"/>
583 <param name="max_insertion_length" value="3"/>
584 <param name="max_deletion_length" value="3"/>
585 <param name="use_junctions" value="No" />
586 <param name="use_search" value="No" />
587 <param name="use_search" value="Yes" />
588 <param name="min_coverage_intron" value="50" />
589 <param name="max_coverage_intron" value="20000" />
590 <param name="microexon_search" value="No" />
591 <output name="insertions" file="tophat_out8i.bed" />
592 <output name="deletions" file="tophat_out8d.bed" />
593 <output name="junctions" file="tophat_out8j.bed" />
594 <output name="accepted_hits" file="tophat_out8h.bam" compare="sim_size" />
595 </test>
596 </tests>
597
598 <help>
599 **Tophat Overview**
600
601 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009).
602
603 .. _Tophat: http://tophat.cbcb.umd.edu/
604
605 ------
606
607 **Know what you are doing**
608
609 .. class:: warningmark
610
611 There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
612
613 .. __: http://tophat.cbcb.umd.edu/manual.html
614
615 ------
616
617 **Input formats**
618
619 Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
620
621 ------
622
623 **Outputs**
624
625 Tophat produces two main output files:
626
627 - junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
628 - accepted_hits -- A list of read alignments in BAM_ format.
629
630 .. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
631 .. _BAM: http://samtools.sourceforge.net/
632
633 Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.
634
635 -------
636
637 **Tophat settings**
638
639 All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.
640
641 ------
642
643 **Tophat parameter list**
644
645 This is a list of implemented Tophat options::
646
647 This is a list of implemented Tophat options::
648
649 -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments
650 selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter
651 is required for paired end runs.
652 --mate-std-dev INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
653 -a/--min-anchor-length INT The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced
654 alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one
655 read with this many bases on each side. This must be at least 3 and the default is 8.
656 -m/--splice-mismatches INT The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0.
657 -i/--min-intron-length INT The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70.
658 -I/--max-intron-length INT The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000.
659 -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of
660 exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the
661 filter. The default is 0.15.
662 -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many
663 alignments. The default is 40.
664 -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.
665 -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.
666 -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G)
667 --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.
668 --closure-search Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)
669 --no-coverage-search Disables the coverage based search for junctions.
670 --coverage-search Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.
671 --microexon-search With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.
672 --butterfly-search TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.
673 --segment-mismatches Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.
674 --segment-length Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.
675 --min-closure-exon During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.
676 --min-closure-intron The minimum intron length that may be found during closure search. The default is 50.
677 --max-closure-intron The maximum intron length that may be found during closure search. The default is 5000.
678 --min-coverage-intron The minimum intron length that may be found during coverage search. The default is 50.
679 --max-coverage-intron The maximum intron length that may be found during coverage search. The default is 20000.
680 --min-segment-intron The minimum intron length that may be found during split-segment search. The default is 50.
681 --max-segment-intron The maximum intron length that may be found during split-segment search. The default is 500000.
682 </help>
683 </tool>