Mercurial > repos > jjohnson > arriba
comparison arriba.xml @ 6:7253b367c082 draft
"planemo upload for repository https://github.com/jj-umn/tools-iuc/tree/arriba/tools/arriba commit ea14642edb0816912a856281944eb5e8a37c11ea"
author | jjohnson |
---|---|
date | Mon, 11 Oct 2021 01:47:22 +0000 |
parents | 005b200c8841 |
children | 25d207f7ff83 |
comparison
equal
deleted
inserted
replaced
5:005b200c8841 | 6:7253b367c082 |
---|---|
68 -k '$known_fusions' | 68 -k '$known_fusions' |
69 #end if | 69 #end if |
70 #if $tags | 70 #if $tags |
71 -t '$tags' | 71 -t '$tags' |
72 #end if | 72 #end if |
73 #if str($wgs.use_wgs) == "yes" | |
74 -d '$wgs.wgs' | |
75 #if $wgs.max_genomic_breakpoint_distance | |
76 -D $wgs.max_genomic_breakpoint_distance | |
77 #end if | |
78 #end if | |
73 -o fusions.tsv | 79 -o fusions.tsv |
74 #if $output_fusions_discarded | 80 #if $output_fusions_discarded |
75 -O fusions.discarded.tsv | 81 -O fusions.discarded.tsv |
76 #end if | 82 #end if |
83 ## Arriba options | |
84 #if $options.gtf_features | |
85 -G $options.gtf_features | |
86 #end if | |
87 #if $options.strandedness | |
88 -s $options.strandedness | |
89 #end if | |
90 #if $options.genome_contigs | |
91 -i $options.genome_contigs | |
92 #end if | |
93 #if $options.viral_contigs | |
94 -v $options.viral_contigs | |
95 #end if | |
96 #if $options.max_evalue | |
97 -E $options.max_evalue | |
98 #end if | |
99 #if $options.min_supporting_reads | |
100 -S $options.min_supporting_reads | |
101 #end if | |
102 #if $options.max_mismappers | |
103 -m $options.max_mismappers | |
104 #end if | |
105 #if $options.max_homolog_identity | |
106 -L $options.max_homolog_identity | |
107 #end if | |
108 #if $options.homopolymer_length | |
109 -H $options.homopolymer_length | |
110 #end if | |
111 #if $options.read_through_distance | |
112 -R $options.read_through_distance | |
113 #end if | |
114 #if $options.min_anchor_length | |
115 -A $options.min_anchor_length | |
116 #end if | |
117 #if $options.many_spliced_events | |
118 -M $options.many_spliced_events | |
119 #end if | |
120 #if $options.max_kmer_content | |
121 -m $options.max_kmer_content | |
122 #end if | |
123 #if $options.max_mismatch_pvalue | |
124 -V $options.max_mismatch_pvalue | |
125 #end if | |
126 #if $options.fragment_length | |
127 -F $options.fragment_length | |
128 #end if | |
129 #if $options.max_reads | |
130 -U $options.max_reads | |
131 #end if | |
132 #if $options.quantile | |
133 -Q $options.quantile | |
134 #end if | |
135 #if $options.exonic_fraction | |
136 -e $options.exonic_fraction | |
137 #end if | |
138 #if $options.top_n | |
139 -T $options.top_n | |
140 #end if | |
141 #if $options.covered_fraction | |
142 -C $options.covered_fraction | |
143 #end if | |
144 #if $options.max_itd_length | |
145 -l $options.max_itd_length | |
146 #end if | |
147 $options.duplicate_marking | |
148 $options.fill_discarded_columns | |
149 $options.fill_the_gaps | |
77 #if str($input_params.input_source) == "use_fastq" | 150 #if str($input_params.input_source) == "use_fastq" |
78 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam Aligned.out.bam > Aligned.sortedByCoord.out.bam | 151 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam Aligned.out.bam > Aligned.sortedByCoord.out.bam |
79 && samtools index Aligned.sortedByCoord.out.bam | 152 && samtools index Aligned.sortedByCoord.out.bam |
80 #elif str($visualization.do_viz) == "yes" | 153 #elif str($visualization.do_viz) == "yes" |
81 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam | 154 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam |
83 #end if | 156 #end if |
84 #if str($visualization.do_viz) == "yes" | 157 #if str($visualization.do_viz) == "yes" |
85 && draw_fusions.R | 158 && draw_fusions.R |
86 --fusions=fusions.tsv | 159 --fusions=fusions.tsv |
87 --alignments=Aligned.sortedByCoord.out.bam | 160 --alignments=Aligned.sortedByCoord.out.bam |
161 --annotation='$gtf' | |
88 --output=fusions.pdf | 162 --output=fusions.pdf |
89 --annotation='$gtf' | |
90 #if $visualization.cytobands | 163 #if $visualization.cytobands |
91 --cytobands='$visualization.cytobands' | 164 --cytobands='$visualization.cytobands' |
92 #end if | 165 #end if |
93 #if $protein_domains | 166 #if $protein_domains |
94 --proteinDomains='$protein_domains' | 167 --proteinDomains='$protein_domains' |
95 #end if | 168 #end if |
169 ## Visualization Options | |
170 #if $visualization.options.transcriptSelection | |
171 --transcriptSelection=$visualization.options.transcriptSelection | |
172 #end if | |
173 #if $visualization.options.minConfidenceForCircosPlot | |
174 --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot | |
175 #end if | |
176 #if $visualization.options.showIntergenicVicinity | |
177 --showIntergenicVicinity=$visualization.options.showIntergenicVicinity | |
178 #end if | |
179 #if $visualization.options.squishIntrons | |
180 --squishIntrons=$visualization.options.squishIntrons | |
181 #end if | |
182 #if $visualization.options.mergeDomainsOverlappingBy | |
183 --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy | |
184 #end if | |
185 #if $visualization.options.printExonLabels | |
186 --printExonLabels=$visualization.options.printExonLabels | |
187 #end if | |
188 #if $visualization.options.render3dEffect | |
189 --render3dEffect=$visualization.options.render3dEffect | |
190 #end if | |
191 #if $visualization.options.optimizeDomainColors | |
192 --optimizeDomainColors=$visualization.options.optimizeDomainColors | |
193 #end if | |
194 #if $visualization.options.color1 | |
195 --color1=$visualization.options.color1 | |
196 #end if | |
197 #if $visualization.options.color2 | |
198 --color2=$visualization.options.color2 | |
199 #end if | |
200 #if $visualization.options.pdfWidth | |
201 --pdfWidth=$visualization.options.pdfWidth | |
202 #end if | |
203 #if $visualization.options.pdfHeight | |
204 --pdfHeight=$visualization.options.pdfHeight | |
205 #end if | |
206 #if $visualization.options.fontSize | |
207 --fontSize=$visualization.options.fontSize | |
208 #end if | |
96 #end if | 209 #end if |
97 | |
98 ]]></command> | 210 ]]></command> |
99 <inputs> | 211 <inputs> |
100 <conditional name="input_params"> | 212 <conditional name="input_params"> |
101 <param name="input_source" type="select" label="Use output from earlier STAR run or let Arriba running STAR"> | 213 <param name="input_source" type="select" label="Use output from earlier STAR run or let Arriba running STAR"> |
102 <option value="use_star">Use output from earlier STAR</option> | 214 <option value="use_star">Use output from earlier STAR</option> |
137 <param name="known_fusions" argument="-k" type="data" format="tabular,tabular.gz" optional="true" label="File containing known fusions"> | 249 <param name="known_fusions" argument="-k" type="data" format="tabular,tabular.gz" optional="true" label="File containing known fusions"> |
138 <help><![CDATA[ file two TAB separated columns: five-prime region three-prime region ]]></help> | 250 <help><![CDATA[ file two TAB separated columns: five-prime region three-prime region ]]></help> |
139 </param> | 251 </param> |
140 <param name="tags" argument="-t" type="data" format="tabular" optional="true" label="File containing tag names for a fusion." | 252 <param name="tags" argument="-t" type="data" format="tabular" optional="true" label="File containing tag names for a fusion." |
141 help="This can be the known fusions if that input has a third column with a name"/> | 253 help="This can be the known fusions if that input has a third column with a name"/> |
142 <param name="output_fusions_discarded" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Output fusions.discarded.tsv"/> | 254 <conditional name="wgs"> |
255 <param name="use_wgs" type="select" label="Use whole-genome sequencing data"> | |
256 <option value="no">no</option> | |
257 <option value="yes">Yes</option> | |
258 </param> | |
259 <when value="yes"> | |
260 <param name="wgs" argument="-d" type="data" format="tabular" label="whole-genome sequencing structural variant data" | |
261 help="These coordinates serve to increase sensitivity towards weakly expressed fusions and to eliminate fusions with low evidence."/> | |
262 <param name="max_genomic_breakpoint_distance" argument="-D" type="integer" value="100000" label="Max genomic breakpoint distance" | |
263 help="determines how far a genomic breakpoint may be away from a transcriptomic breakpoint to consider it as a related event."/> | |
264 </when> | |
265 <when value="no"/> | |
266 </conditional> | |
267 <section name="options" expanded="false" title="Arriba Options"> | |
268 <param name="gtf_features" argument="-G" type="text" value="" optional="true" label="Names of features in the GTF annotation file"> | |
269 <help>Commma or SPACE separated list, default: gene_name=gene_name gene_id=gene_id transcript_id=transcript_id feature_exon=exon feature_CDS=CDS</help> | |
270 <validator type="regex" message="">^(gene_name|gene_id|transcript_id|feature_exon|feature_CDS)=[^ ,]+([ ,](gene_name|gene_id|transcript_id|feature_exon|feature_CDS)=[^ ,]+)?$</validator> | |
271 </param> | |
272 <param name="strandedness" argument="-s" type="select" optional="true" label="Whether a strand-specific protocol was used for library preparation"> | |
273 <help>When unstranded data is processed, the strand can sometimes be inferred from splice-patterns. But in unclear situations, stranded data helps resolve ambiguities.</help> | |
274 <option value="auto">auto</option> | |
275 <option value="yes">yes</option> | |
276 <option value="no">no</option> | |
277 <option value="reverse">reverse</option> | |
278 </param> | |
279 <param name="genome_contigs" argument="-i" type="text" value="" optional="true" label="Comma-/space-separated list of interesting contigs"> | |
280 <help>Comma-/space-separated list of interesting contigs. | |
281 Fusions between genes on other contigs are ignored. Contigs can be specified with or without the prefix "chr". | |
282 Asterisks (*) are treated as wild-cards. | |
283 Default: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y AC_* NC_* | |
284 </help> | |
285 </param> | |
286 <param name="viral_contigs" argument="-v" type="text" value="" optional="true" label="Comma-/space-separated list of viral contigs"> | |
287 <help>Comma-/space-separated list of viral contigs. | |
288 Asterisks (*) are treated as wild-cards. | |
289 Default: AC_* NC_* | |
290 </help> | |
291 </param> | |
292 <param name="max_evalue" argument="-E" type="float" value="" optional="true" label="Max e-value threahold"> | |
293 <help>Arriba estimates the number of fusions with a given number of supporting | |
294 reads which one would expect to see by random chance. If the expected number | |
295 of fusions (e-value) is higher than this threshold, the fusion is | |
296 discarded by the 'relative_support' filter. Note: Increasing this | |
297 threshold can dramatically increase the number of false positives and may | |
298 increase the runtime of resource-intensive steps. Fractional values are possible. | |
299 Default: 0.300000 | |
300 </help> | |
301 </param> | |
302 | |
303 <param name="min_supporting_reads" argument="-S" type="integer" value="" min="1" optional="true" label="Min supporting reads"> | |
304 <help>discard all fusions with fewer than this many supporting reads (split reads and discordant mates combined). | |
305 Default: 2 | |
306 </help> | |
307 </param> | |
308 <param name="max_mismappers" argument="-m" type="float" value="" min="0." max="1.0" optional="true" label="Max mismappers threshold"> | |
309 <help>When more than this fraction of supporting reads turns out to be mismappers, | |
310 the 'mismappers' filter discards the fusion. | |
311 Default: 0.800000 | |
312 </help> | |
313 </param> | |
314 <param name="max_homolog_identity" argument="-L" type="float" value="" min="0." max="1.0" optional="true" label="Max homologs identity threshold"> | |
315 <help>Genes with more than the given fraction of sequence identity are | |
316 considered homologs and removed by the 'homologs' filter. | |
317 Default: 0.300000 | |
318 </help> | |
319 </param> | |
320 <param name="homopolymer_length" argument="-H" type="integer" value="" min="1" optional="true" label="Homopolymer length"> | |
321 <help>The 'homopolymer' filter removes breakpoints adjacent to homopolymers of the given length or more. | |
322 Default: 6 | |
323 </help> | |
324 </param> | |
325 <param name="read_through_distance" argument="-R" type="integer" value="" min="1" optional="true" label="Read-through distance"> | |
326 <help>The 'read_through' filter removes read-through fusions | |
327 where the breakpoints are less than the given distance away from each other. | |
328 Default: 10000 | |
329 </help> | |
330 </param> | |
331 <param name="min_anchor_length" argument="-A" type="integer" value="" min="1" optional="true" label="Min anchor length"> | |
332 <help>Alignment artifacts are often characterized by split reads coming | |
333 from only one gene and no discordant mates. Moreover, the split | |
334 reads only align to a short stretch in one of the genes. The | |
335 'short_anchor' filter removes these fusions. This parameter sets | |
336 the threshold in bp for what the filter considers short. | |
337 Default: 23 | |
338 </help> | |
339 </param> | |
340 <param name="many_spliced_events" argument="-M" type="integer" value="" min="1" optional="true" label="Many spliced events"> | |
341 <help>The 'many_spliced' filter recovers fusions between genes that | |
342 have at least this many spliced breakpoints. | |
343 Default: 4 | |
344 </help> | |
345 </param> | |
346 <param name="max_kmer_content" argument="-m" type="float" value="" min="0." max="1.0" optional="true" label="Max kmer content"> | |
347 <help>The 'low_entropy' filter removes reads with repetitive 3-mers. If | |
348 the 3-mers make up more than the given fraction of the sequence, then | |
349 the read is discarded. | |
350 Default: 0.600000 | |
351 </help> | |
352 </param> | |
353 | |
354 <param name="max_mismatch_pvalue" argument="-V" type="float" value="" optional="true" label="Max mismatchrpvalue threahold"> | |
355 <help>The 'mismatches' filter uses a binomial model to calculate a | |
356 p-value for observing a given number of mismatches in a read. | |
357 If the number of mismatches is too high, the read is discarded. | |
358 Default: 0.010000 | |
359 </help> | |
360 </param> | |
361 | |
362 <param name="fragment_length" argument="-F" type="integer" value="" min="1" optional="true" label="Single-end fragment length"> | |
363 <help>When paired-end data is given, the fragment length is estimated | |
364 automatically and this parameter has no effect. But when single-end | |
365 data is given, the mean fragment length should be specified to | |
366 effectively filter fusions that arise from hairpin structures. | |
367 Default: 200 | |
368 </help> | |
369 </param> | |
370 <param name="max_reads" argument="-U" type="integer" value="" min="1" optional="true" label="Max reads"> | |
371 <help>Subsample fusions with more than the given number of supporting reads. This | |
372 improves performance without compromising sensitivity, as long as the | |
373 threshold is high. Counting of supporting reads beyond the threshold is | |
374 inaccurate, obviously. | |
375 Default: 300 | |
376 </help> | |
377 </param> | |
378 <param name="quantile" argument="-Q" type="float" value="" min="0." max="1.0" optional="true" label="Quantile"> | |
379 <help>Highly expressed genes are prone to produce artifacts during library preparation. | |
380 Genes with an expression above the given quantile are eligible for filtering by the 'in_vitro' filter. | |
381 Default: 0.998000 | |
382 </help> | |
383 </param> | |
384 <param name="exonic_fraction" argument="-e" type="float" value="" min="0." max="1.0" optional="true" label="Exonic fraction"> | |
385 <help>The breakpoints of false-positive predictions of intragenic events | |
386 are often both in exons. True predictions are more likely to have at | |
387 least one breakpoint in an intron, because introns are larger. | |
388 If the fraction of exonic sequence between two breakpoints is smaller than | |
389 the given fraction, the 'intragenic_exonic' filter discards the event. | |
390 Default: 0.330000 | |
391 </help> | |
392 </param> | |
393 | |
394 <param name="top_n" argument="-T" type="integer" value="" min="1" optional="true" label="top N viral contigs"> | |
395 <help>Only report viral integration sites of the top N most highly expressed viral contigs. | |
396 Default: 5 | |
397 </help> | |
398 </param> | |
399 <param name="covered_fraction" argument="-C" type="float" value="" min="0." max="1.0" optional="true" label="Covered fraction"> | |
400 <help>Ignore virally associated events if the virus is not fully expressed, | |
401 i.e., less than the given fraction of the viral contig is transcribed. | |
402 Default: 0.150000 | |
403 </help> | |
404 </param> | |
405 <param name="max_itd_length" argument="-l" type="integer" value="" min="1" optional="true" label="Maximum length of internal tandem duplications"> | |
406 <help>Note: Increasing this value beyond the default can impair performance and lead to many false positives. | |
407 Default: 100 | |
408 </help> | |
409 </param> | |
410 <param name="duplicate_marking" argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Use aligners duplicate marking"> | |
411 <help>Instead of performing duplicate marking itself, Arriba relies on duplicate marking by a | |
412 preceding program using the BAM_FDUP flag. This makes sense when unique molecular | |
413 identifiers (UMI) are used. | |
414 </help> | |
415 </param> | |
416 <param name="fill_discarded_columns" argument="-X" type="boolean" truevalue="-X" falsevalue="" checked="false" label="Fill all fusion.discarded.tsv columns"> | |
417 <help>To reduce the runtime and file size, by default, the columns 'fusion_transcript', | |
418 'peptide_sequence', and 'read_identifiers' are left empty in the file containing | |
419 discarded fusion candidates (see parameter -O). When this flag is set, this extra | |
420 information is reported in the discarded fusions file. | |
421 </help> | |
422 </param> | |
423 <param name="fill_the_gaps" argument="-I" type="boolean" truevalue="-I" falsevalue="" checked="false" label="Fill fusion transcript gaps from the assembly"> | |
424 <help>If assembly of the fusion transcript sequence from the supporting reads is incomplete | |
425 (denoted as '...'), fill the gaps using the assembly sequence wherever possible. | |
426 </help> | |
427 </param> | |
428 </section> | |
429 <param name="output_fusions_discarded" argument="-O" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Output fusions.discarded.tsv"/> | |
143 <conditional name="visualization"> | 430 <conditional name="visualization"> |
144 <param name="do_viz" type="select" label="Generate visualization"> | 431 <param name="do_viz" type="select" label="Generate visualization"> |
145 <option value="yes">Yes</option> | 432 <option value="yes">Yes</option> |
146 <option value="no">no</option> | 433 <option value="no">no</option> |
147 </param> | 434 </param> |
148 <when value="yes"> | 435 <when value="yes"> |
149 <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/> | 436 <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/> |
437 <section name="options" expanded="false" title="Visualization Options"> | |
438 <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection"> | |
439 <help>By default the transcript isoform with the highest coverage is drawn. | |
440 Alternatively, the transcript isoform that is provided in the columns | |
441 transcript_id1 and transcript_id2 in the given fusions file can be drawn. | |
442 Selecting the isoform with the highest coverage usually produces nicer plots, | |
443 in the sense that the coverage track is smooth and shows a visible increase in coverage after the fusion breakpoint. | |
444 However, the isoform with the highest coverage may not be the one that is involved in the fusion. | |
445 Often, genomic rearrangements lead to non-canonical isoforms being transcribed. | |
446 For this reason, it can make sense to rely on the transcript selection provided by the columns transcript_id1/2, | |
447 which reflect the actual isoforms involved in a fusion. | |
448 \ As a third option, the transcripts that are annotated as canonical can be drawn. | |
449 Transcript isoforms tagged with appris_principal, appris_candidate, or CCDS are considered canonical. | |
450 </help> | |
451 <option value="coverage">coverage</option> | |
452 <option value="provided">provided</option> | |
453 <option value="canonical">canonical</option> | |
454 </param> | |
455 <param argument="--minConfidenceForCircosPlot" type="select" optional="true" label="Transcript selection"> | |
456 <help>The fusion of interest is drawn as a solid line in the circos plot. | |
457 To give an impression of the overall degree of rearrangement, | |
458 all other fusions are drawn as semi-transparent lines in the background. | |
459 This option determines which other fusions should be included in the circos plot. | |
460 Values specify the minimum confidence a fusion must have to be included. | |
461 It usually makes no sense to include low-confidence fusions in circos plots, | |
462 because they are abundant and unreliable, and would clutter up the circos plot. | |
463 Default: medium | |
464 </help> | |
465 <option value="none">none - only the fusion of interest is drawn</option> | |
466 <option value="low">low</option> | |
467 <option value="medium">medium</option> | |
468 <option value="high">high</option> | |
469 </param> | |
470 <param argument="--showIntergenicVicinity" type="integer" value="" min="0" optional="true" label="Intergenic Vicinity"> | |
471 <help>This option only applies to intergenic breakpoints. | |
472 If it is set to a value greater than 0, then the script draws the genes | |
473 which are no more than the given distance away from an intergenic breakpoint. | |
474 Note that this option is incompatible with squishIntrons. | |
475 Default: 0 | |
476 </help> | |
477 </param> | |
478 <param argument="--squishIntrons" type="select" optional="true" label="Squish introns"> | |
479 <help>Exons usually make up only a small fraction of a gene. | |
480 They may be hard to see in the plot. i | |
481 Since introns are in most situations of no interest in the context of gene fusions, | |
482 this switch can be used to shrink the size of introns to a fixed, negligible size. | |
483 It makes sense to disable this feature, if breakpoints in introns are of importance. | |
484 Default: TRUE | |
485 </help> | |
486 <option value="TRUE">True</option> | |
487 <option value="FALSE">False</option> | |
488 </param> | |
489 | |
490 <param argument="--mergeDomainsOverlappingBy" type="float" value="" min="0." max="1.0" optional="true" label="Merge Domains Overlapping By"> | |
491 <help>Occasionally, domains are annotated redundantly. | |
492 For example, tyrosine kinase domains are frequently annotated as | |
493 Protein tyrosine kinase and Protein kinase domain. | |
494 In order to simplify the visualization, such domains can be merged into one, | |
495 given that they overlap by the given fraction. | |
496 The description of the larger domain is used. | |
497 Default: 0.9 | |
498 </help> | |
499 </param> | |
500 <param argument="--printExonLabels" type="select" optional="true" label="Print Exon Labels"> | |
501 <help>By default the number of an exon is printed inside each exon, | |
502 which is taken from the attribute exon_number of the GTF annotation. | |
503 When a gene has many exons, the boxes may be too narrow to contain the labels, | |
504 resulting in unreadable exon labels. In these situations, i | |
505 it may be better to turn off exon labels. | |
506 Default: TRUE | |
507 </help> | |
508 <option value="TRUE">True</option> | |
509 <option value="FALSE">False</option> | |
510 </param> | |
511 <param argument="--render3dEffect" type="select" optional="true" label="Render 3D effect"> | |
512 <help>Whether light and shadow should be rendered to give objects a 3D effect. | |
513 Default: TRUE | |
514 </help> | |
515 <option value="TRUE">True</option> | |
516 <option value="FALSE">False</option> | |
517 </param> | |
518 <param argument="--optimizeDomainColors" type="select" optional="true" label="Optimize Domain Colors"> | |
519 <help>By default, the script colorizes domains according to the colors | |
520 specified in the file given in --annotation. | |
521 This way, coloring of domains is consistent across all proteins. | |
522 But since there are more distinct domains than colors, | |
523 this can lead to different domains having the same color. | |
524 If this option is set to TRUE, the colors are recomputed for each fusion separately. | |
525 This ensures that the colors have the maximum distance for each individual fusion, | |
526 but they are no longer consistent across different fusions. | |
527 Default: FALSE | |
528 </help> | |
529 <option value="TRUE">True</option> | |
530 <option value="FALSE">False</option> | |
531 </param> | |
532 <param argument="--color1" type="color" value="" optional="true" label="Color of the 5' end of the fusion."/> | |
533 <param argument="--color2" type="color" value="" optional="true" label="Color of the 3' end of the fusion."/> | |
534 <param argument="--pdfWidth" type="float" value="" min="1." optional="true" label="Width of PDF output file in inches" | |
535 help="Default: 11.692"/> | |
536 <param argument="--pdfHeight" type="float" value="" min="1." optional="true" label="Height of PDF output file in inches" | |
537 help="Default: 8.267"/> | |
538 <param argument="--fontSize" type="float" value="" min="0." optional="true" label="Scale the size of text" | |
539 help="Default: 1.0"/> | |
540 </section> | |
541 | |
150 </when> | 542 </when> |
151 <when value="no"/> | 543 <when value="no"/> |
152 </conditional> | 544 </conditional> |
545 | |
153 </inputs> | 546 </inputs> |
154 <outputs> | 547 <outputs> |
155 <data name="fusions" format="tabular" label="${tool.name} on ${on_string}: fusions.tsv" from_work_dir="fusions.tsv"/> | 548 <data name="fusions" format="tabular" label="${tool.name} on ${on_string}: fusions.tsv" from_work_dir="fusions.tsv"/> |
156 <data name="discarded" format="tabular" label="${tool.name} on ${on_string}: fusions.discarded.tsv" from_work_dir="fusions.discarded.tsv"> | 549 <data name="discarded" format="tabular" label="${tool.name} on ${on_string}: fusions.discarded.tsv" from_work_dir="fusions.discarded.tsv"> |
157 <filter> output_fusions_discarded == "yes"</filter> | 550 <filter> output_fusions_discarded == "yes"</filter> |
201 </output> | 594 </output> |
202 </test> | 595 </test> |
203 | 596 |
204 </tests> | 597 </tests> |
205 <help><![CDATA[ | 598 <help><![CDATA[ |
206 ** Arriba ** | 599 **Arriba** |
207 | 600 |
208 | 601 |
209 Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions. | 602 Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions. |
210 It is based on chimeric alignments found by the STAR RNA-Seq aligner. | 603 It is based on chimeric alignments found by the STAR RNA-Seq aligner. |
211 | 604 |
212 | 605 |
213 ** INPUTS_ ** | 606 **INPUTS** |
214 | 607 |
608 See: https://arriba.readthedocs.io/en/latest/input-files/ | |
215 | 609 |
216 - Alignments | 610 - Alignments |
217 | 611 |
218 Arriba takes the main output file of STAR (Aligned.out.bam) as input (parameter -x). If STAR was run with the parameter --chimOutType WithinBAM, then this file contains all the information needed by Arriba to find fusions. When STAR was run with the parameter --chimOutType SeparateSAMold, the main output file lacks chimeric alignments. Instead, STAR writes them to a separate output file named Chimeric.out.sam. In this case, the file needs to be passed to Arriba via the parameter -c in addition to the main output file Aligned.out.bam. | 612 Arriba takes the main output file of STAR (Aligned.out.bam) as input (parameter -x). If STAR was run with the parameter --chimOutType WithinBAM, then this file contains all the information needed by Arriba to find fusions. When STAR was run with the parameter --chimOutType SeparateSAMold, the main output file lacks chimeric alignments. Instead, STAR writes them to a separate output file named Chimeric.out.sam. In this case, the file needs to be passed to Arriba via the parameter -c in addition to the main output file Aligned.out.bam. |
219 | 613 |
350 Arriba checks if the orientation of the structural variant matches that of a fusion detected in the RNA-Seq data. If, for example, Arriba predicts the 5' end of a gene to be retained in a fusion, then a structural variant is expected to confirm this, or else the variant is not considered to be related. | 744 Arriba checks if the orientation of the structural variant matches that of a fusion detected in the RNA-Seq data. If, for example, Arriba predicts the 5' end of a gene to be retained in a fusion, then a structural variant is expected to confirm this, or else the variant is not considered to be related. |
351 | 745 |
352 NOTE: Arriba was designed for alignments from RNA-Seq data. It should not be run on WGS data directly. Many assumptions made by Arriba about the data (statistical models, blacklist, etc.) only apply to RNA-Seq data and are not valid for DNA-Seq data. For such data, a structural variant calling algorithm should be used and the results should be passed to Arriba. | 746 NOTE: Arriba was designed for alignments from RNA-Seq data. It should not be run on WGS data directly. Many assumptions made by Arriba about the data (statistical models, blacklist, etc.) only apply to RNA-Seq data and are not valid for DNA-Seq data. For such data, a structural variant calling algorithm should be used and the results should be passed to Arriba. |
353 | 747 |
354 | 748 |
355 ** OUTPUTS_ ** | 749 **OUTPUTS** |
750 | |
751 See: https://arriba.readthedocs.io/en/latest/output-files/ | |
356 | 752 |
357 - fusions.tsv | 753 - fusions.tsv |
358 | 754 |
359 The file fusions.tsv (as specified by the parameter -o) contains fusions which pass all of Arriba's filters. It should be highly enriched for true predictions. The predictions are listed from highest to lowest confidence. The following paragraphs describe the columns in detail: | 755 The file fusions.tsv (as specified by the parameter -o) contains fusions which pass all of Arriba's filters. It should be highly enriched for true predictions. The predictions are listed from highest to lowest confidence. The following paragraphs describe the columns in detail: |
360 | 756 |
401 - fusions.discarded.tsv | 797 - fusions.discarded.tsv |
402 | 798 |
403 The file fusions.discarded.tsv (as specified by the parameter -O) contains all events that Arriba classified as an artifact or that are also observed in healthy tissue. It has the same format as the file fusions.tsv. | 799 The file fusions.discarded.tsv (as specified by the parameter -O) contains all events that Arriba classified as an artifact or that are also observed in healthy tissue. It has the same format as the file fusions.tsv. |
404 | 800 |
405 | 801 |
406 ** VISUALIZATION_ ** | 802 **VISUALIZATION** |
803 | |
804 See: https://arriba.readthedocs.io/en/latest/visualization/ | |
805 | |
407 - fusions.pdf | 806 - fusions.pdf |
408 | 807 |
409 A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint. | 808 A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint. |
410 | 809 |
411 | 810 |
412 Code repository: https://github.com/suhrig/arriba | 811 **OPTIONS** |
413 Get help/report bugs: https://github.com/suhrig/arriba/issues | 812 |
414 User manual: https://arriba.readthedocs.io/ | 813 - Arriba: https://arriba.readthedocs.io/en/latest/command-line-options/#arriba |
415 Please cite: https://doi.org/10.1101/gr.257246.119 | 814 - Visualization: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr |
815 - RNA STAR: https://arriba.readthedocs.io/en/latest/workflow/ | |
416 | 816 |
417 | 817 |
418 .. _Arriba: https://arriba.readthedocs.io/en/latest/ | 818 .. _Arriba: https://arriba.readthedocs.io/en/latest/ |
419 .. _INPUTS: https://arriba.readthedocs.io/en/latest/input-files/ | 819 .. _INPUTS: https://arriba.readthedocs.io/en/latest/input-files/ |
420 .. _OUTPUTS: https://arriba.readthedocs.io/en/latest/output-files/ | 820 .. _OUTPUTS: https://arriba.readthedocs.io/en/latest/output-files/ |
421 .. _VISUALIZATION: https://arriba.readthedocs.io/en/latest/visualization/ | 821 .. _VISUALIZATION: https://arriba.readthedocs.io/en/latest/visualization/ |
822 .. _OPTIONS: https://arriba.readthedocs.io/en/latest/command-line-options/ | |
422 | 823 |
423 ]]></help> | 824 ]]></help> |
424 <expand macro="citations" /> | 825 <expand macro="citations" /> |
425 </tool> | 826 </tool> |