comparison tophat_wrapper.xml @ 3:2ad64c5bb5f4 draft default tip

planemo upload commit 4ec9eb1570ea116d83f5464a786af6e14fb5b57d
author devteam
date Fri, 09 Oct 2015 15:48:13 -0400
parents 8afd8a379d5f
children
comparison
equal deleted inserted replaced
2:8afd8a379d5f 3:2ad64c5bb5f4
1 <tool id="tophat" name="Tophat for Illumina" version="1.5.0"> 1 <tool id="tophat" name="TopHat for Illumina" version="1.5.0">
2 <!-- Wrapper compatible with Tophat versions 1.3.0 to 1.4.1 --> 2 <!-- Wrapper compatible with TopHat versions 1.3.0 to 1.4.1 -->
3 <description>Find splice junctions using RNA-seq data</description> 3 <description>Find splice junctions using RNA-seq data</description>
4 <version_command>tophat --version</version_command> 4 <version_command>tophat --version</version_command>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="0.1.18">samtools</requirement> 6 <requirement type="package" version="0.1.18">samtools</requirement>
7 <requirement type="package" version="0.12.7">bowtie</requirement> 7 <requirement type="package" version="0.12.7">bowtie</requirement>
41 --max-segment-intron $singlePaired.sParams.max_segment_intron 41 --max-segment-intron $singlePaired.sParams.max_segment_intron
42 --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches 42 --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches
43 --seg-mismatches=$singlePaired.sParams.seg_mismatches 43 --seg-mismatches=$singlePaired.sParams.seg_mismatches
44 --seg-length=$singlePaired.sParams.seg_length 44 --seg-length=$singlePaired.sParams.seg_length
45 --library-type=$singlePaired.sParams.library_type 45 --library-type=$singlePaired.sParams.library_type
46 46
47 ## Indel search. 47 ## Indel search.
48 #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes": 48 #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":
49 ## --allow-indels 49 ## --allow-indels
50 --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length 50 --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length
51 --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length 51 --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length
102 --max-segment-intron $singlePaired.pParams.max_segment_intron 102 --max-segment-intron $singlePaired.pParams.max_segment_intron
103 --initial-read-mismatches=$singlePaired.pParams.initial_read_mismatches 103 --initial-read-mismatches=$singlePaired.pParams.initial_read_mismatches
104 --seg-mismatches=$singlePaired.pParams.seg_mismatches 104 --seg-mismatches=$singlePaired.pParams.seg_mismatches
105 --seg-length=$singlePaired.pParams.seg_length 105 --seg-length=$singlePaired.pParams.seg_length
106 --library-type=$singlePaired.pParams.library_type 106 --library-type=$singlePaired.pParams.library_type
107 107
108 ## Indel search. 108 ## Indel search.
109 #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes": 109 #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes":
110 ## --allow-indels 110 ## --allow-indels
111 --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length 111 --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length
112 --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length 112 --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length
179 </param> 179 </param>
180 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." /> 180 <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
181 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" /> 181 <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
182 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." /> 182 <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
183 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." /> 183 <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
184 <conditional name="indel_search"> 184 <expand macro="indel_searchConditional" />
185 <param name="allow_indel_search" type="select" label="Allow indel search">
186 <option value="Yes">Yes</option>
187 <option value="No">No</option>
188 </param>
189 <when value="No"/>
190 <when value="Yes">
191 <param name="max_insertion_length" type="integer" value="3" label="Max insertion length." help="The maximum insertion length." />
192 <param name="max_deletion_length" type="integer" value="3" label="Max deletion length." help="The maximum deletion length." />
193 </when>
194 </conditional>
195 alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
196 <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" /> 185 <param name="max_multihits" type="integer" value="20" label="Maximum number of alignments to be allowed" />
197 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" /> 186 <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
198 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" /> 187 <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
199 <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" /> 188 <param name="initial_read_mismatches" type="integer" min="0" value="2" label="Number of mismatches allowed in the initial read mapping" />
200 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" /> 189 <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
201 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" /> 190 <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
202 191
203 <!-- Options for supplying own junctions. --> 192 <!-- Options for supplying own junctions. -->
204 <conditional name="own_junctions"> 193 <conditional name="own_junctions">
205 <param name="use_junctions" type="select" label="Use Own Junctions"> 194 <param name="use_junctions" type="select" label="Use Own Junctions">
206 <option value="No">No</option> 195 <option value="No">No</option>
207 <option value="Yes">Yes</option> 196 <option value="Yes">Yes</option>
232 <option value="Yes">Yes</option> 221 <option value="Yes">Yes</option>
233 </param> 222 </param>
234 </when> 223 </when>
235 <when value="No" /> 224 <when value="No" />
236 </conditional> <!-- /own_junctions --> 225 </conditional> <!-- /own_junctions -->
237 226
238 <!-- Closure search. --> 227 <!-- Closure search. -->
239 <conditional name="closure_search"> 228 <conditional name="closure_search">
240 <param name="use_search" type="select" label="Use Closure Search"> 229 <param name="use_search" type="select" label="Use Closure Search">
241 <option value="No">No</option> 230 <option value="No">No</option>
242 <option value="Yes">Yes</option> 231 <option value="Yes">Yes</option>
400 <output name="junctions" file="tophat_out2j.bed" /> 389 <output name="junctions" file="tophat_out2j.bed" />
401 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" /> 390 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
402 </test> 391 </test>
403 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters --> 392 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
404 <test> 393 <test>
405 <!-- Tophat commands: 394 <!-- TopHat commands:
406 bowtie-build -f test-data/tophat_in1.fasta tophat_in1 395 bowtie-build -f test-data/tophat_in1.fasta tophat_in1
407 tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger 396 tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
408 Replace the + with double-dash 397 Replace the + with double-dash
409 Rename the files in tmp_dir appropriately 398 Rename the files in tmp_dir appropriately
410 --> 399 -->
479 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" /> 468 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
480 </test> 469 </test>
481 </tests> 470 </tests>
482 471
483 <help> 472 <help>
484 **Tophat Overview** 473 **TopHat Overview**
485 474
486 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009). 475 TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons.
487 476
488 .. _Tophat: http://ccb.jhu.edu/software/tophat/ 477 .. _TopHat: http://ccb.jhu.edu/software/tophat/
489 478
490 ------ 479 ------
491 480
492 **Know what you are doing** 481 **Know what you are doing**
493 482
494 .. class:: warningmark 483 .. class:: warningmark
499 488
500 ------ 489 ------
501 490
502 **Input formats** 491 **Input formats**
503 492
504 Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. 493 TopHat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
505 494
506 ------ 495 ------
507 496
508 **Outputs** 497 **Outputs**
509 498
510 Tophat produces two output files: 499 TopHat produces two output files:
511 500
512 - junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction. 501 - junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
513 - accepted_hits -- A list of read alignments in BAM_ format. 502 - accepted_hits -- A list of read alignments in BAM_ format.
514 503
515 .. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1 504 .. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
517 506
518 Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format. 507 Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.
519 508
520 ------- 509 -------
521 510
522 **Tophat settings** 511 **TopHat settings**
523 512
524 All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here. 513 All of the options have a default value. You can change any of them. Some of the options in TopHat have been implemented here.
525 514
526 ------ 515 ------
527 516
528 **Tophat parameter list** 517 **TopHat parameter list**
529 518
530 This is a list of implemented Tophat options:: 519 This is a list of implemented TopHat options::
531 520
532 -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments 521 -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments
533 selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter 522 selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter
534 is required for paired end runs. 523 is required for paired end runs.
535 --mate-std-dev INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp. 524 --mate-std-dev INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
536 -a/--min-anchor-length INT The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced 525 -a/--min-anchor-length INT The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced
537 alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one 526 alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one
538 read with this many bases on each side. This must be at least 3 and the default is 8. 527 read with this many bases on each side. This must be at least 3 and the default is 8.
539 -m/--splice-mismatches INT The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0. 528 -m/--splice-mismatches INT The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0.
540 -i/--min-intron-length INT The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70. 529 -i/--min-intron-length INT The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70.
541 -I/--max-intron-length INT The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000. 530 -I/--max-intron-length INT The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000.
542 -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many 531 -g/--max-multihits INT Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many
543 alignments. The default is 40. 532 alignments. The default is 40.
544 -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping. 533 -G/--GTF [GTF 2.2 file] Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.
545 -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive. 534 -j/--raw-juncs [juncs file] Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.
546 -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G) 535 -no-novel-juncs Only look for junctions indicated in the supplied GFF file. (ignored without -G)
547 --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default. 536 --no-closure-search Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.