comparison arriba.xml @ 6:7253b367c082 draft

"planemo upload for repository https://github.com/jj-umn/tools-iuc/tree/arriba/tools/arriba commit ea14642edb0816912a856281944eb5e8a37c11ea"
author jjohnson
date Mon, 11 Oct 2021 01:47:22 +0000
parents 005b200c8841
children 25d207f7ff83
comparison
equal deleted inserted replaced
5:005b200c8841 6:7253b367c082
68 -k '$known_fusions' 68 -k '$known_fusions'
69 #end if 69 #end if
70 #if $tags 70 #if $tags
71 -t '$tags' 71 -t '$tags'
72 #end if 72 #end if
73 #if str($wgs.use_wgs) == "yes"
74 -d '$wgs.wgs'
75 #if $wgs.max_genomic_breakpoint_distance
76 -D $wgs.max_genomic_breakpoint_distance
77 #end if
78 #end if
73 -o fusions.tsv 79 -o fusions.tsv
74 #if $output_fusions_discarded 80 #if $output_fusions_discarded
75 -O fusions.discarded.tsv 81 -O fusions.discarded.tsv
76 #end if 82 #end if
83 ## Arriba options
84 #if $options.gtf_features
85 -G $options.gtf_features
86 #end if
87 #if $options.strandedness
88 -s $options.strandedness
89 #end if
90 #if $options.genome_contigs
91 -i $options.genome_contigs
92 #end if
93 #if $options.viral_contigs
94 -v $options.viral_contigs
95 #end if
96 #if $options.max_evalue
97 -E $options.max_evalue
98 #end if
99 #if $options.min_supporting_reads
100 -S $options.min_supporting_reads
101 #end if
102 #if $options.max_mismappers
103 -m $options.max_mismappers
104 #end if
105 #if $options.max_homolog_identity
106 -L $options.max_homolog_identity
107 #end if
108 #if $options.homopolymer_length
109 -H $options.homopolymer_length
110 #end if
111 #if $options.read_through_distance
112 -R $options.read_through_distance
113 #end if
114 #if $options.min_anchor_length
115 -A $options.min_anchor_length
116 #end if
117 #if $options.many_spliced_events
118 -M $options.many_spliced_events
119 #end if
120 #if $options.max_kmer_content
121 -m $options.max_kmer_content
122 #end if
123 #if $options.max_mismatch_pvalue
124 -V $options.max_mismatch_pvalue
125 #end if
126 #if $options.fragment_length
127 -F $options.fragment_length
128 #end if
129 #if $options.max_reads
130 -U $options.max_reads
131 #end if
132 #if $options.quantile
133 -Q $options.quantile
134 #end if
135 #if $options.exonic_fraction
136 -e $options.exonic_fraction
137 #end if
138 #if $options.top_n
139 -T $options.top_n
140 #end if
141 #if $options.covered_fraction
142 -C $options.covered_fraction
143 #end if
144 #if $options.max_itd_length
145 -l $options.max_itd_length
146 #end if
147 $options.duplicate_marking
148 $options.fill_discarded_columns
149 $options.fill_the_gaps
77 #if str($input_params.input_source) == "use_fastq" 150 #if str($input_params.input_source) == "use_fastq"
78 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam Aligned.out.bam > Aligned.sortedByCoord.out.bam 151 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam Aligned.out.bam > Aligned.sortedByCoord.out.bam
79 && samtools index Aligned.sortedByCoord.out.bam 152 && samtools index Aligned.sortedByCoord.out.bam
80 #elif str($visualization.do_viz) == "yes" 153 #elif str($visualization.do_viz) == "yes"
81 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam 154 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam
83 #end if 156 #end if
84 #if str($visualization.do_viz) == "yes" 157 #if str($visualization.do_viz) == "yes"
85 && draw_fusions.R 158 && draw_fusions.R
86 --fusions=fusions.tsv 159 --fusions=fusions.tsv
87 --alignments=Aligned.sortedByCoord.out.bam 160 --alignments=Aligned.sortedByCoord.out.bam
161 --annotation='$gtf'
88 --output=fusions.pdf 162 --output=fusions.pdf
89 --annotation='$gtf'
90 #if $visualization.cytobands 163 #if $visualization.cytobands
91 --cytobands='$visualization.cytobands' 164 --cytobands='$visualization.cytobands'
92 #end if 165 #end if
93 #if $protein_domains 166 #if $protein_domains
94 --proteinDomains='$protein_domains' 167 --proteinDomains='$protein_domains'
95 #end if 168 #end if
169 ## Visualization Options
170 #if $visualization.options.transcriptSelection
171 --transcriptSelection=$visualization.options.transcriptSelection
172 #end if
173 #if $visualization.options.minConfidenceForCircosPlot
174 --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot
175 #end if
176 #if $visualization.options.showIntergenicVicinity
177 --showIntergenicVicinity=$visualization.options.showIntergenicVicinity
178 #end if
179 #if $visualization.options.squishIntrons
180 --squishIntrons=$visualization.options.squishIntrons
181 #end if
182 #if $visualization.options.mergeDomainsOverlappingBy
183 --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy
184 #end if
185 #if $visualization.options.printExonLabels
186 --printExonLabels=$visualization.options.printExonLabels
187 #end if
188 #if $visualization.options.render3dEffect
189 --render3dEffect=$visualization.options.render3dEffect
190 #end if
191 #if $visualization.options.optimizeDomainColors
192 --optimizeDomainColors=$visualization.options.optimizeDomainColors
193 #end if
194 #if $visualization.options.color1
195 --color1=$visualization.options.color1
196 #end if
197 #if $visualization.options.color2
198 --color2=$visualization.options.color2
199 #end if
200 #if $visualization.options.pdfWidth
201 --pdfWidth=$visualization.options.pdfWidth
202 #end if
203 #if $visualization.options.pdfHeight
204 --pdfHeight=$visualization.options.pdfHeight
205 #end if
206 #if $visualization.options.fontSize
207 --fontSize=$visualization.options.fontSize
208 #end if
96 #end if 209 #end if
97
98 ]]></command> 210 ]]></command>
99 <inputs> 211 <inputs>
100 <conditional name="input_params"> 212 <conditional name="input_params">
101 <param name="input_source" type="select" label="Use output from earlier STAR run or let Arriba running STAR"> 213 <param name="input_source" type="select" label="Use output from earlier STAR run or let Arriba running STAR">
102 <option value="use_star">Use output from earlier STAR</option> 214 <option value="use_star">Use output from earlier STAR</option>
137 <param name="known_fusions" argument="-k" type="data" format="tabular,tabular.gz" optional="true" label="File containing known fusions"> 249 <param name="known_fusions" argument="-k" type="data" format="tabular,tabular.gz" optional="true" label="File containing known fusions">
138 <help><![CDATA[ file two TAB separated columns: five-prime region three-prime region ]]></help> 250 <help><![CDATA[ file two TAB separated columns: five-prime region three-prime region ]]></help>
139 </param> 251 </param>
140 <param name="tags" argument="-t" type="data" format="tabular" optional="true" label="File containing tag names for a fusion." 252 <param name="tags" argument="-t" type="data" format="tabular" optional="true" label="File containing tag names for a fusion."
141 help="This can be the known fusions if that input has a third column with a name"/> 253 help="This can be the known fusions if that input has a third column with a name"/>
142 <param name="output_fusions_discarded" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Output fusions.discarded.tsv"/> 254 <conditional name="wgs">
255 <param name="use_wgs" type="select" label="Use whole-genome sequencing data">
256 <option value="no">no</option>
257 <option value="yes">Yes</option>
258 </param>
259 <when value="yes">
260 <param name="wgs" argument="-d" type="data" format="tabular" label="whole-genome sequencing structural variant data"
261 help="These coordinates serve to increase sensitivity towards weakly expressed fusions and to eliminate fusions with low evidence."/>
262 <param name="max_genomic_breakpoint_distance" argument="-D" type="integer" value="100000" label="Max genomic breakpoint distance"
263 help="determines how far a genomic breakpoint may be away from a transcriptomic breakpoint to consider it as a related event."/>
264 </when>
265 <when value="no"/>
266 </conditional>
267 <section name="options" expanded="false" title="Arriba Options">
268 <param name="gtf_features" argument="-G" type="text" value="" optional="true" label="Names of features in the GTF annotation file">
269 <help>Commma or SPACE separated list, default: gene_name=gene_name gene_id=gene_id transcript_id=transcript_id feature_exon=exon feature_CDS=CDS</help>
270 <validator type="regex" message="">^(gene_name|gene_id|transcript_id|feature_exon|feature_CDS)=[^ ,]+([ ,](gene_name|gene_id|transcript_id|feature_exon|feature_CDS)=[^ ,]+)?$</validator>
271 </param>
272 <param name="strandedness" argument="-s" type="select" optional="true" label="Whether a strand-specific protocol was used for library preparation">
273 <help>When unstranded data is processed, the strand can sometimes be inferred from splice-patterns. But in unclear situations, stranded data helps resolve ambiguities.</help>
274 <option value="auto">auto</option>
275 <option value="yes">yes</option>
276 <option value="no">no</option>
277 <option value="reverse">reverse</option>
278 </param>
279 <param name="genome_contigs" argument="-i" type="text" value="" optional="true" label="Comma-/space-separated list of interesting contigs">
280 <help>Comma-/space-separated list of interesting contigs.
281 Fusions between genes on other contigs are ignored. Contigs can be specified with or without the prefix "chr".
282 Asterisks (*) are treated as wild-cards.
283 Default: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y AC_* NC_*
284 </help>
285 </param>
286 <param name="viral_contigs" argument="-v" type="text" value="" optional="true" label="Comma-/space-separated list of viral contigs">
287 <help>Comma-/space-separated list of viral contigs.
288 Asterisks (*) are treated as wild-cards.
289 Default: AC_* NC_*
290 </help>
291 </param>
292 <param name="max_evalue" argument="-E" type="float" value="" optional="true" label="Max e-value threahold">
293 <help>Arriba estimates the number of fusions with a given number of supporting
294 reads which one would expect to see by random chance. If the expected number
295 of fusions (e-value) is higher than this threshold, the fusion is
296 discarded by the 'relative_support' filter. Note: Increasing this
297 threshold can dramatically increase the number of false positives and may
298 increase the runtime of resource-intensive steps. Fractional values are possible.
299 Default: 0.300000
300 </help>
301 </param>
302
303 <param name="min_supporting_reads" argument="-S" type="integer" value="" min="1" optional="true" label="Min supporting reads">
304 <help>discard all fusions with fewer than this many supporting reads (split reads and discordant mates combined).
305 Default: 2
306 </help>
307 </param>
308 <param name="max_mismappers" argument="-m" type="float" value="" min="0." max="1.0" optional="true" label="Max mismappers threshold">
309 <help>When more than this fraction of supporting reads turns out to be mismappers,
310 the 'mismappers' filter discards the fusion.
311 Default: 0.800000
312 </help>
313 </param>
314 <param name="max_homolog_identity" argument="-L" type="float" value="" min="0." max="1.0" optional="true" label="Max homologs identity threshold">
315 <help>Genes with more than the given fraction of sequence identity are
316 considered homologs and removed by the 'homologs' filter.
317 Default: 0.300000
318 </help>
319 </param>
320 <param name="homopolymer_length" argument="-H" type="integer" value="" min="1" optional="true" label="Homopolymer length">
321 <help>The 'homopolymer' filter removes breakpoints adjacent to homopolymers of the given length or more.
322 Default: 6
323 </help>
324 </param>
325 <param name="read_through_distance" argument="-R" type="integer" value="" min="1" optional="true" label="Read-through distance">
326 <help>The 'read_through' filter removes read-through fusions
327 where the breakpoints are less than the given distance away from each other.
328 Default: 10000
329 </help>
330 </param>
331 <param name="min_anchor_length" argument="-A" type="integer" value="" min="1" optional="true" label="Min anchor length">
332 <help>Alignment artifacts are often characterized by split reads coming
333 from only one gene and no discordant mates. Moreover, the split
334 reads only align to a short stretch in one of the genes. The
335 'short_anchor' filter removes these fusions. This parameter sets
336 the threshold in bp for what the filter considers short.
337 Default: 23
338 </help>
339 </param>
340 <param name="many_spliced_events" argument="-M" type="integer" value="" min="1" optional="true" label="Many spliced events">
341 <help>The 'many_spliced' filter recovers fusions between genes that
342 have at least this many spliced breakpoints.
343 Default: 4
344 </help>
345 </param>
346 <param name="max_kmer_content" argument="-m" type="float" value="" min="0." max="1.0" optional="true" label="Max kmer content">
347 <help>The 'low_entropy' filter removes reads with repetitive 3-mers. If
348 the 3-mers make up more than the given fraction of the sequence, then
349 the read is discarded.
350 Default: 0.600000
351 </help>
352 </param>
353
354 <param name="max_mismatch_pvalue" argument="-V" type="float" value="" optional="true" label="Max mismatchrpvalue threahold">
355 <help>The 'mismatches' filter uses a binomial model to calculate a
356 p-value for observing a given number of mismatches in a read.
357 If the number of mismatches is too high, the read is discarded.
358 Default: 0.010000
359 </help>
360 </param>
361
362 <param name="fragment_length" argument="-F" type="integer" value="" min="1" optional="true" label="Single-end fragment length">
363 <help>When paired-end data is given, the fragment length is estimated
364 automatically and this parameter has no effect. But when single-end
365 data is given, the mean fragment length should be specified to
366 effectively filter fusions that arise from hairpin structures.
367 Default: 200
368 </help>
369 </param>
370 <param name="max_reads" argument="-U" type="integer" value="" min="1" optional="true" label="Max reads">
371 <help>Subsample fusions with more than the given number of supporting reads. This
372 improves performance without compromising sensitivity, as long as the
373 threshold is high. Counting of supporting reads beyond the threshold is
374 inaccurate, obviously.
375 Default: 300
376 </help>
377 </param>
378 <param name="quantile" argument="-Q" type="float" value="" min="0." max="1.0" optional="true" label="Quantile">
379 <help>Highly expressed genes are prone to produce artifacts during library preparation.
380 Genes with an expression above the given quantile are eligible for filtering by the 'in_vitro' filter.
381 Default: 0.998000
382 </help>
383 </param>
384 <param name="exonic_fraction" argument="-e" type="float" value="" min="0." max="1.0" optional="true" label="Exonic fraction">
385 <help>The breakpoints of false-positive predictions of intragenic events
386 are often both in exons. True predictions are more likely to have at
387 least one breakpoint in an intron, because introns are larger.
388 If the fraction of exonic sequence between two breakpoints is smaller than
389 the given fraction, the 'intragenic_exonic' filter discards the event.
390 Default: 0.330000
391 </help>
392 </param>
393
394 <param name="top_n" argument="-T" type="integer" value="" min="1" optional="true" label="top N viral contigs">
395 <help>Only report viral integration sites of the top N most highly expressed viral contigs.
396 Default: 5
397 </help>
398 </param>
399 <param name="covered_fraction" argument="-C" type="float" value="" min="0." max="1.0" optional="true" label="Covered fraction">
400 <help>Ignore virally associated events if the virus is not fully expressed,
401 i.e., less than the given fraction of the viral contig is transcribed.
402 Default: 0.150000
403 </help>
404 </param>
405 <param name="max_itd_length" argument="-l" type="integer" value="" min="1" optional="true" label="Maximum length of internal tandem duplications">
406 <help>Note: Increasing this value beyond the default can impair performance and lead to many false positives.
407 Default: 100
408 </help>
409 </param>
410 <param name="duplicate_marking" argument="-u" type="boolean" truevalue="-u" falsevalue="" checked="false" label="Use aligners duplicate marking">
411 <help>Instead of performing duplicate marking itself, Arriba relies on duplicate marking by a
412 preceding program using the BAM_FDUP flag. This makes sense when unique molecular
413 identifiers (UMI) are used.
414 </help>
415 </param>
416 <param name="fill_discarded_columns" argument="-X" type="boolean" truevalue="-X" falsevalue="" checked="false" label="Fill all fusion.discarded.tsv columns">
417 <help>To reduce the runtime and file size, by default, the columns 'fusion_transcript',
418 'peptide_sequence', and 'read_identifiers' are left empty in the file containing
419 discarded fusion candidates (see parameter -O). When this flag is set, this extra
420 information is reported in the discarded fusions file.
421 </help>
422 </param>
423 <param name="fill_the_gaps" argument="-I" type="boolean" truevalue="-I" falsevalue="" checked="false" label="Fill fusion transcript gaps from the assembly">
424 <help>If assembly of the fusion transcript sequence from the supporting reads is incomplete
425 (denoted as '...'), fill the gaps using the assembly sequence wherever possible.
426 </help>
427 </param>
428 </section>
429 <param name="output_fusions_discarded" argument="-O" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Output fusions.discarded.tsv"/>
143 <conditional name="visualization"> 430 <conditional name="visualization">
144 <param name="do_viz" type="select" label="Generate visualization"> 431 <param name="do_viz" type="select" label="Generate visualization">
145 <option value="yes">Yes</option> 432 <option value="yes">Yes</option>
146 <option value="no">no</option> 433 <option value="no">no</option>
147 </param> 434 </param>
148 <when value="yes"> 435 <when value="yes">
149 <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/> 436 <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/>
437 <section name="options" expanded="false" title="Visualization Options">
438 <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection">
439 <help>By default the transcript isoform with the highest coverage is drawn.
440 Alternatively, the transcript isoform that is provided in the columns
441 transcript_id1 and transcript_id2 in the given fusions file can be drawn.
442 Selecting the isoform with the highest coverage usually produces nicer plots,
443 in the sense that the coverage track is smooth and shows a visible increase in coverage after the fusion breakpoint.
444 However, the isoform with the highest coverage may not be the one that is involved in the fusion.
445 Often, genomic rearrangements lead to non-canonical isoforms being transcribed.
446 For this reason, it can make sense to rely on the transcript selection provided by the columns transcript_id1/2,
447 which reflect the actual isoforms involved in a fusion.
448 \ As a third option, the transcripts that are annotated as canonical can be drawn.
449 Transcript isoforms tagged with appris_principal, appris_candidate, or CCDS are considered canonical.
450 </help>
451 <option value="coverage">coverage</option>
452 <option value="provided">provided</option>
453 <option value="canonical">canonical</option>
454 </param>
455 <param argument="--minConfidenceForCircosPlot" type="select" optional="true" label="Transcript selection">
456 <help>The fusion of interest is drawn as a solid line in the circos plot.
457 To give an impression of the overall degree of rearrangement,
458 all other fusions are drawn as semi-transparent lines in the background.
459 This option determines which other fusions should be included in the circos plot.
460 Values specify the minimum confidence a fusion must have to be included.
461 It usually makes no sense to include low-confidence fusions in circos plots,
462 because they are abundant and unreliable, and would clutter up the circos plot.
463 Default: medium
464 </help>
465 <option value="none">none - only the fusion of interest is drawn</option>
466 <option value="low">low</option>
467 <option value="medium">medium</option>
468 <option value="high">high</option>
469 </param>
470 <param argument="--showIntergenicVicinity" type="integer" value="" min="0" optional="true" label="Intergenic Vicinity">
471 <help>This option only applies to intergenic breakpoints.
472 If it is set to a value greater than 0, then the script draws the genes
473 which are no more than the given distance away from an intergenic breakpoint.
474 Note that this option is incompatible with squishIntrons.
475 Default: 0
476 </help>
477 </param>
478 <param argument="--squishIntrons" type="select" optional="true" label="Squish introns">
479 <help>Exons usually make up only a small fraction of a gene.
480 They may be hard to see in the plot. i
481 Since introns are in most situations of no interest in the context of gene fusions,
482 this switch can be used to shrink the size of introns to a fixed, negligible size.
483 It makes sense to disable this feature, if breakpoints in introns are of importance.
484 Default: TRUE
485 </help>
486 <option value="TRUE">True</option>
487 <option value="FALSE">False</option>
488 </param>
489
490 <param argument="--mergeDomainsOverlappingBy" type="float" value="" min="0." max="1.0" optional="true" label="Merge Domains Overlapping By">
491 <help>Occasionally, domains are annotated redundantly.
492 For example, tyrosine kinase domains are frequently annotated as
493 Protein tyrosine kinase and Protein kinase domain.
494 In order to simplify the visualization, such domains can be merged into one,
495 given that they overlap by the given fraction.
496 The description of the larger domain is used.
497 Default: 0.9
498 </help>
499 </param>
500 <param argument="--printExonLabels" type="select" optional="true" label="Print Exon Labels">
501 <help>By default the number of an exon is printed inside each exon,
502 which is taken from the attribute exon_number of the GTF annotation.
503 When a gene has many exons, the boxes may be too narrow to contain the labels,
504 resulting in unreadable exon labels. In these situations, i
505 it may be better to turn off exon labels.
506 Default: TRUE
507 </help>
508 <option value="TRUE">True</option>
509 <option value="FALSE">False</option>
510 </param>
511 <param argument="--render3dEffect" type="select" optional="true" label="Render 3D effect">
512 <help>Whether light and shadow should be rendered to give objects a 3D effect.
513 Default: TRUE
514 </help>
515 <option value="TRUE">True</option>
516 <option value="FALSE">False</option>
517 </param>
518 <param argument="--optimizeDomainColors" type="select" optional="true" label="Optimize Domain Colors">
519 <help>By default, the script colorizes domains according to the colors
520 specified in the file given in --annotation.
521 This way, coloring of domains is consistent across all proteins.
522 But since there are more distinct domains than colors,
523 this can lead to different domains having the same color.
524 If this option is set to TRUE, the colors are recomputed for each fusion separately.
525 This ensures that the colors have the maximum distance for each individual fusion,
526 but they are no longer consistent across different fusions.
527 Default: FALSE
528 </help>
529 <option value="TRUE">True</option>
530 <option value="FALSE">False</option>
531 </param>
532 <param argument="--color1" type="color" value="" optional="true" label="Color of the 5' end of the fusion."/>
533 <param argument="--color2" type="color" value="" optional="true" label="Color of the 3' end of the fusion."/>
534 <param argument="--pdfWidth" type="float" value="" min="1." optional="true" label="Width of PDF output file in inches"
535 help="Default: 11.692"/>
536 <param argument="--pdfHeight" type="float" value="" min="1." optional="true" label="Height of PDF output file in inches"
537 help="Default: 8.267"/>
538 <param argument="--fontSize" type="float" value="" min="0." optional="true" label="Scale the size of text"
539 help="Default: 1.0"/>
540 </section>
541
150 </when> 542 </when>
151 <when value="no"/> 543 <when value="no"/>
152 </conditional> 544 </conditional>
545
153 </inputs> 546 </inputs>
154 <outputs> 547 <outputs>
155 <data name="fusions" format="tabular" label="${tool.name} on ${on_string}: fusions.tsv" from_work_dir="fusions.tsv"/> 548 <data name="fusions" format="tabular" label="${tool.name} on ${on_string}: fusions.tsv" from_work_dir="fusions.tsv"/>
156 <data name="discarded" format="tabular" label="${tool.name} on ${on_string}: fusions.discarded.tsv" from_work_dir="fusions.discarded.tsv"> 549 <data name="discarded" format="tabular" label="${tool.name} on ${on_string}: fusions.discarded.tsv" from_work_dir="fusions.discarded.tsv">
157 <filter> output_fusions_discarded == "yes"</filter> 550 <filter> output_fusions_discarded == "yes"</filter>
201 </output> 594 </output>
202 </test> 595 </test>
203 596
204 </tests> 597 </tests>
205 <help><![CDATA[ 598 <help><![CDATA[
206 ** Arriba ** 599 **Arriba**
207 600
208 601
209 Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions. 602 Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions.
210 It is based on chimeric alignments found by the STAR RNA-Seq aligner. 603 It is based on chimeric alignments found by the STAR RNA-Seq aligner.
211 604
212 605
213 ** INPUTS_ ** 606 **INPUTS**
214 607
608 See: https://arriba.readthedocs.io/en/latest/input-files/
215 609
216 - Alignments 610 - Alignments
217 611
218 Arriba takes the main output file of STAR (Aligned.out.bam) as input (parameter -x). If STAR was run with the parameter --chimOutType WithinBAM, then this file contains all the information needed by Arriba to find fusions. When STAR was run with the parameter --chimOutType SeparateSAMold, the main output file lacks chimeric alignments. Instead, STAR writes them to a separate output file named Chimeric.out.sam. In this case, the file needs to be passed to Arriba via the parameter -c in addition to the main output file Aligned.out.bam. 612 Arriba takes the main output file of STAR (Aligned.out.bam) as input (parameter -x). If STAR was run with the parameter --chimOutType WithinBAM, then this file contains all the information needed by Arriba to find fusions. When STAR was run with the parameter --chimOutType SeparateSAMold, the main output file lacks chimeric alignments. Instead, STAR writes them to a separate output file named Chimeric.out.sam. In this case, the file needs to be passed to Arriba via the parameter -c in addition to the main output file Aligned.out.bam.
219 613
350 Arriba checks if the orientation of the structural variant matches that of a fusion detected in the RNA-Seq data. If, for example, Arriba predicts the 5' end of a gene to be retained in a fusion, then a structural variant is expected to confirm this, or else the variant is not considered to be related. 744 Arriba checks if the orientation of the structural variant matches that of a fusion detected in the RNA-Seq data. If, for example, Arriba predicts the 5' end of a gene to be retained in a fusion, then a structural variant is expected to confirm this, or else the variant is not considered to be related.
351 745
352 NOTE: Arriba was designed for alignments from RNA-Seq data. It should not be run on WGS data directly. Many assumptions made by Arriba about the data (statistical models, blacklist, etc.) only apply to RNA-Seq data and are not valid for DNA-Seq data. For such data, a structural variant calling algorithm should be used and the results should be passed to Arriba. 746 NOTE: Arriba was designed for alignments from RNA-Seq data. It should not be run on WGS data directly. Many assumptions made by Arriba about the data (statistical models, blacklist, etc.) only apply to RNA-Seq data and are not valid for DNA-Seq data. For such data, a structural variant calling algorithm should be used and the results should be passed to Arriba.
353 747
354 748
355 ** OUTPUTS_ ** 749 **OUTPUTS**
750
751 See: https://arriba.readthedocs.io/en/latest/output-files/
356 752
357 - fusions.tsv 753 - fusions.tsv
358 754
359 The file fusions.tsv (as specified by the parameter -o) contains fusions which pass all of Arriba's filters. It should be highly enriched for true predictions. The predictions are listed from highest to lowest confidence. The following paragraphs describe the columns in detail: 755 The file fusions.tsv (as specified by the parameter -o) contains fusions which pass all of Arriba's filters. It should be highly enriched for true predictions. The predictions are listed from highest to lowest confidence. The following paragraphs describe the columns in detail:
360 756
401 - fusions.discarded.tsv 797 - fusions.discarded.tsv
402 798
403 The file fusions.discarded.tsv (as specified by the parameter -O) contains all events that Arriba classified as an artifact or that are also observed in healthy tissue. It has the same format as the file fusions.tsv. 799 The file fusions.discarded.tsv (as specified by the parameter -O) contains all events that Arriba classified as an artifact or that are also observed in healthy tissue. It has the same format as the file fusions.tsv.
404 800
405 801
406 ** VISUALIZATION_ ** 802 **VISUALIZATION**
803
804 See: https://arriba.readthedocs.io/en/latest/visualization/
805
407 - fusions.pdf 806 - fusions.pdf
408 807
409 A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint. 808 A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint.
410 809
411 810
412 Code repository: https://github.com/suhrig/arriba 811 **OPTIONS**
413 Get help/report bugs: https://github.com/suhrig/arriba/issues 812
414 User manual: https://arriba.readthedocs.io/ 813 - Arriba: https://arriba.readthedocs.io/en/latest/command-line-options/#arriba
415 Please cite: https://doi.org/10.1101/gr.257246.119 814 - Visualization: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr
815 - RNA STAR: https://arriba.readthedocs.io/en/latest/workflow/
416 816
417 817
418 .. _Arriba: https://arriba.readthedocs.io/en/latest/ 818 .. _Arriba: https://arriba.readthedocs.io/en/latest/
419 .. _INPUTS: https://arriba.readthedocs.io/en/latest/input-files/ 819 .. _INPUTS: https://arriba.readthedocs.io/en/latest/input-files/
420 .. _OUTPUTS: https://arriba.readthedocs.io/en/latest/output-files/ 820 .. _OUTPUTS: https://arriba.readthedocs.io/en/latest/output-files/
421 .. _VISUALIZATION: https://arriba.readthedocs.io/en/latest/visualization/ 821 .. _VISUALIZATION: https://arriba.readthedocs.io/en/latest/visualization/
822 .. _OPTIONS: https://arriba.readthedocs.io/en/latest/command-line-options/
422 823
423 ]]></help> 824 ]]></help>
424 <expand macro="citations" /> 825 <expand macro="citations" />
425 </tool> 826 </tool>