Mercurial > repos > jjohnson > arriba
comparison arriba.xml @ 11:8ed8af5836d1 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit e0aa03add09ecc4ad5a5d41c439b8af9551fc53c"
author | jjohnson |
---|---|
date | Tue, 26 Apr 2022 20:21:29 +0000 |
parents | c58d1774c762 |
children | 73fd7703a743 |
comparison
equal
deleted
inserted
replaced
10:c58d1774c762 | 11:8ed8af5836d1 |
---|---|
1 <tool id="arriba" name="Arriba" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> | 1 <tool id="arriba" name="Arriba" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> |
2 <description>detect gene fusions from STAR aligned RNA-Seq data</description> | 2 <description>detect gene fusions from STAR aligned RNA-Seq data</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 <xml name="fusion_actions"> | |
6 <actions> | |
7 <action name="comment_lines" type="metadata" default="1" /> | |
8 <action name="column_names" type="metadata" default="gene1,gene2,strand1(gene/fusion),strand2(gene/fusion),breakpoint1,breakpoint2,site1,site2,type,split_reads1,split_reads2,discordant_mates,coverage1,coverage2,confidence,reading_frame,tags,retained_protein_domains,closest_genomic_breakpoint1,closest_genomic_breakpoint2,gene_id1,gene_id2,transcript_id1,transcript_id2,direction1,direction2,filters,fusion_transcript,peptide_sequence,read_identifiers" /> | |
9 </actions> | |
10 </xml> | |
5 </macros> | 11 </macros> |
6 <expand macro="requirements" /> | 12 <expand macro="requirements" /> |
7 <expand macro="version_command" /> | 13 <expand macro="version_command" /> |
8 <command detect_errors="exit_code"><![CDATA[ | 14 <command detect_errors="exit_code"><![CDATA[ |
9 @GENOME_SOURCE@ | 15 @GENOME_SOURCE@ |
37 #if str($input_params.index.index_source) == "history" | 43 #if str($input_params.index.index_source) == "history" |
38 #set $star_index_dir = $input_params.index.star_index.extra_files_path | 44 #set $star_index_dir = $input_params.index.star_index.extra_files_path |
39 #else | 45 #else |
40 #set $star_index_dir = $input_params.index.arriba_ref.fields.star_index | 46 #set $star_index_dir = $input_params.index.arriba_ref.fields.star_index |
41 #end if | 47 #end if |
48 #if $blacklist | |
49 #if $blacklist.is_of_type('tabular.gz') | |
50 #set $blacklist_file = 'blacklist.tsv.gz' | |
51 ln -sf '$blacklist' $blacklist_file && | |
52 #else | |
53 #set $blacklist_file = $blacklist | |
54 #end if | |
55 #end if | |
56 #if $known_fusions | |
57 #if $known_fusions.is_of_type('tabular.gz') | |
58 #set $known_fusions_file = 'known_fusions.tsv.gz' | |
59 ln -sf '$known_fusions' $known_fusions_file && | |
60 #else | |
61 #set $known_fusions_file = $known_fusions | |
62 #end if | |
63 #end if | |
64 #if $tags | |
65 #if $tags.is_of_type('tabular.gz') | |
66 #set $tags_file = 'tags.tsv.gz' | |
67 ln -sf '$tags' $tags_file && | |
68 #else | |
69 #set $tags_file = $tags | |
70 #end if | |
71 #end if | |
72 | |
42 STAR | 73 STAR |
43 --runThreadN \${GALAXY_SLOTS:-1} | 74 --runThreadN \${GALAXY_SLOTS:-1} |
44 --genomeDir $star_index_dir | 75 --genomeDir $star_index_dir |
45 --genomeLoad NoSharedMemory | 76 --genomeLoad NoSharedMemory |
46 --readFilesIn $read1 $read2 | 77 --readFilesIn $read1 $read2 |
72 #end if | 103 #end if |
73 #end if | 104 #end if |
74 -a '$genome_assembly' | 105 -a '$genome_assembly' |
75 -g '$genome_annotation' | 106 -g '$genome_annotation' |
76 #if $blacklist | 107 #if $blacklist |
77 -b '$blacklist' | 108 -b '$blacklist_file' |
78 #else | 109 #else |
79 -f 'blacklist' | 110 -f 'blacklist' |
80 #end if | 111 #end if |
81 #if $protein_domains | 112 #if $protein_domains |
82 -p '$protein_domains' | 113 -p '$protein_domains' |
83 #end if | 114 #end if |
84 #if $known_fusions | 115 #if $known_fusions |
85 -k '$known_fusions' | 116 -k '$known_fusions_file' |
86 #end if | 117 #end if |
87 #if $tags | 118 #if $tags |
88 -t '$tags' | 119 -t '$tags_file' |
89 #end if | 120 #end if |
90 #if str($wgs.use_wgs) == "yes" | 121 #if str($wgs.use_wgs) == "yes" |
91 -d '$wgs.wgs' | 122 -d '$wgs.wgs' |
92 #if $wgs.max_genomic_breakpoint_distance | 123 #if $wgs.max_genomic_breakpoint_distance |
93 -D $wgs.max_genomic_breakpoint_distance | 124 -D $wgs.max_genomic_breakpoint_distance |
175 && samtools index Aligned.sortedByCoord.out.bam | 206 && samtools index Aligned.sortedByCoord.out.bam |
176 #elif str($visualization.do_viz) == "yes" | 207 #elif str($visualization.do_viz) == "yes" |
177 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam | 208 && samtools sort -@ \${GALAXY_SLOTS:-1} -m 4G -T tmp -O bam '$input_params.input' > Aligned.sortedByCoord.out.bam |
178 && samtools index Aligned.sortedByCoord.out.bam | 209 && samtools index Aligned.sortedByCoord.out.bam |
179 #end if | 210 #end if |
211 #if $output_fusions_vcf | |
212 && convert_fusions_to_vcf.sh '$genome_assembly' fusions.tsv fusions.vcf | |
213 #end if | |
214 #if $output_fusion_bams | |
215 && mkdir fusion_bams | |
216 && extract_fusion-supporting_alignments.sh fusions.tsv Aligned.sortedByCoord.out.bam 'fusion_bams/fusion' | |
217 #end if | |
180 #if str($visualization.do_viz) == "yes" | 218 #if str($visualization.do_viz) == "yes" |
181 #set $fusions = 'fusions.tsv' | 219 #set $fusions = 'fusions.tsv' |
182 && @DRAW_FUSIONS@ | 220 && @DRAW_FUSIONS@ |
183 #end if | 221 #end if |
184 ]]></command> | 222 ]]></command> |
185 <inputs> | 223 <inputs> |
186 <conditional name="input_params"> | 224 <conditional name="input_params"> |
187 <param name="input_source" type="select" label="Use output from earlier STAR run or let Arriba running STAR"> | 225 <param name="input_source" type="select" label="Use output from earlier STAR run or let Arriba running STAR"> |
188 <option value="use_star">Use output from earlier STAR</option> | 226 <option value="use_star">Use output from earlier STAR</option> |
189 <option value="use_fastq">Let Arriba control running STAR</option> | 227 <option value="use_fastq">Let Arriba control running STAR</option> |
190 </param> | 228 </param> |
191 <when value="use_star"> | 229 <when value="use_star"> |
192 <param name="input" argument="-x" type="data" format="sam,bam,cram" label="STAR Aligned.out.sam"/> | 230 <param name="input" argument="-x" type="data" format="sam,bam,cram" label="STAR Aligned.out.sam"> |
231 <help><![CDATA[ recommended STAR options: --chimSegmentMin 10 --chimOutType WithinBAM ]]></help> | |
232 </param> | |
193 <param name="chimeric" argument="-c" type="data" format="sam,bam,cram" optional="true" label="STAR Chimeric.out.sam"> | 233 <param name="chimeric" argument="-c" type="data" format="sam,bam,cram" optional="true" label="STAR Chimeric.out.sam"> |
194 <help><![CDATA[ only required, if STAR was run with the parameter '--chimOutType SeparateSAMold' ]]></help> | 234 <help><![CDATA[ only required, if STAR was run with the parameter '--chimOutType SeparateSAMold' ]]></help> |
195 </param> | 235 </param> |
196 </when> | 236 </when> |
197 <when value="use_fastq"> | 237 <when value="use_fastq"> |
421 (denoted as '...'), fill the gaps using the assembly sequence wherever possible. | 461 (denoted as '...'), fill the gaps using the assembly sequence wherever possible. |
422 </help> | 462 </help> |
423 </param> | 463 </param> |
424 </section> | 464 </section> |
425 <param name="output_fusions_discarded" argument="-O" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Output fusions.discarded.tsv"/> | 465 <param name="output_fusions_discarded" argument="-O" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Output fusions.discarded.tsv"/> |
466 <param name="output_fusions_vcf" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Output fusions.vcf"/> | |
467 <param name="output_fusion_bams" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Output fusion BAMs"/> | |
426 <conditional name="visualization"> | 468 <conditional name="visualization"> |
427 <param name="do_viz" type="select" label="Generate visualization"> | 469 <param name="do_viz" type="select" label="Generate visualization"> |
428 <option value="yes">Yes</option> | 470 <option value="yes">Yes</option> |
429 <option value="no">no</option> | 471 <option value="no">no</option> |
430 </param> | 472 </param> |
431 <when value="yes"> | 473 <when value="yes"> |
432 <expand macro="visualization_options" /> | 474 <expand macro="visualization_options" /> |
433 </when> | 475 </when> |
434 <when value="no"/> | 476 <when value="no"/> |
435 </conditional> | 477 </conditional> |
436 | |
437 </inputs> | 478 </inputs> |
438 <outputs> | 479 <outputs> |
439 <data name="fusions_tsv" format="tabular" label="${tool.name} on ${on_string}: fusions.tsv" from_work_dir="fusions.tsv"/> | 480 <data name="fusions_tsv" format="tabular" label="${tool.name} on ${on_string}: fusions.tsv" from_work_dir="fusions.tsv"> |
481 <expand macro="fusion_actions" /> | |
482 </data> | |
483 | |
440 <data name="discarded_fusions_tsv" format="tabular" label="${tool.name} on ${on_string}: fusions.discarded.tsv" from_work_dir="fusions.discarded.tsv"> | 484 <data name="discarded_fusions_tsv" format="tabular" label="${tool.name} on ${on_string}: fusions.discarded.tsv" from_work_dir="fusions.discarded.tsv"> |
441 <filter> output_fusions_discarded == True</filter> | 485 <filter> output_fusions_discarded == True</filter> |
486 <expand macro="fusion_actions" /> | |
442 </data> | 487 </data> |
488 <data name="fusions_vcf" format="vcf" label="${tool.name} on ${on_string}: fusions.vcf" from_work_dir="fusions.vcf"> | |
489 <filter> output_fusions_vcf == True</filter> | |
490 </data> | |
491 <collection name="fusion_bams" type="list" label="${tool.name} on ${on_string}: Fusion Alignments"> | |
492 <discover_datasets pattern="(?P<name>fusion_\d+\.bam)$" format="bam" directory="fusion_bams" visible="false"/> | |
493 <filter>output_fusion_bams == True</filter> | |
494 </collection> | |
443 <data name="aligned_bam" format="bam" label="${tool.name} on ${on_string}: Aligned.bam" from_work_dir="Aligned.sortedByCoord.out.bam"> | 495 <data name="aligned_bam" format="bam" label="${tool.name} on ${on_string}: Aligned.bam" from_work_dir="Aligned.sortedByCoord.out.bam"> |
444 <filter>input_params['input_source'] == "use_fastq"</filter> | 496 <filter>input_params['input_source'] == "use_fastq"</filter> |
445 </data> | 497 </data> |
446 <data name="fusions_pdf" format="pdf" label="${tool.name} on ${on_string}: fusions.pdf" from_work_dir="fusions.pdf"> | 498 <data name="fusions_pdf" format="pdf" label="${tool.name} on ${on_string}: fusions.pdf" from_work_dir="fusions.pdf"> |
447 <filter>visualization['do_viz'] == "yes"</filter> | 499 <filter>visualization['do_viz'] == "yes"</filter> |
469 <has_text_matching expression="BCR\tABL1"/> | 521 <has_text_matching expression="BCR\tABL1"/> |
470 </assert_contents> | 522 </assert_contents> |
471 </output> | 523 </output> |
472 </test> | 524 </test> |
473 <!-- Test 2 - From exisitng BAM with protein_domains and visualization --> | 525 <!-- Test 2 - From exisitng BAM with protein_domains and visualization --> |
474 | |
475 <test> | 526 <test> |
476 <conditional name="input_params"> | 527 <conditional name="input_params"> |
477 <param name="input_source" value="use_star"/> | 528 <param name="input_source" value="use_star"/> |
478 <param name="input" ftype="sam" value="Aligned.out.sam"/> | 529 <param name="input" ftype="sam" value="Aligned.out.sam"/> |
479 </conditional> | 530 </conditional> |
535 | 586 |
536 - Alignments | 587 - Alignments |
537 | 588 |
538 Arriba takes the main output file of STAR (Aligned.out.bam) as input (parameter -x). If STAR was run with the parameter --chimOutType WithinBAM, then this file contains all the information needed by Arriba to find fusions. When STAR was run with the parameter --chimOutType SeparateSAMold, the main output file lacks chimeric alignments. Instead, STAR writes them to a separate output file named Chimeric.out.sam. In this case, the file needs to be passed to Arriba via the parameter -c in addition to the main output file Aligned.out.bam. | 589 Arriba takes the main output file of STAR (Aligned.out.bam) as input (parameter -x). If STAR was run with the parameter --chimOutType WithinBAM, then this file contains all the information needed by Arriba to find fusions. When STAR was run with the parameter --chimOutType SeparateSAMold, the main output file lacks chimeric alignments. Instead, STAR writes them to a separate output file named Chimeric.out.sam. In this case, the file needs to be passed to Arriba via the parameter -c in addition to the main output file Aligned.out.bam. |
539 | 590 |
591 STAR index create recommended parameter value: | |
592 | |
593 * --sjdbOverhang 250 | |
594 | |
595 | |
596 STAR recommended parameter values :: | |
597 | |
598 * --outSAMunmapped Within | |
599 * --outFilterMultimapNmax 50 | |
600 * --peOverlapNbasesMin 10 | |
601 * --alignSplicedMateMapLminOverLmate 0.5 | |
602 * --alignSJstitchMismatchNmax 5 -1 5 5 | |
603 * --chimSegmentMin 10 | |
604 * --chimOutType WithinBAM HardClip | |
605 * --chimJunctionOverhangMin 10 | |
606 * --chimScoreDropMax 30 | |
607 * --chimScoreJunctionNonGTAG 0 | |
608 * --chimScoreSeparation 1 | |
609 * --chimSegmentReadGapMax 3 | |
610 * --chimMultimapNmax 50 | |
611 | |
612 | |
540 Arriba extracts three types of reads from the alignment file(s): | 613 Arriba extracts three types of reads from the alignment file(s): |
541 | 614 |
542 * Split-reads, i.e., reads composed of segments which map in a non-linear way. STAR stores such reads as supplementary alignments. | 615 * Split-reads, i.e., reads composed of segments which map in a non-linear way. STAR stores such reads as supplementary alignments. |
543 * Discordant mates, i.e., paired-end reads which originate from the same fragment but which align in a non-linear way. | 616 * Discordant mates, i.e., paired-end reads which originate from the same fragment but which align in a non-linear way. |
544 * Alignments which cross the boundaries of annotated genes, because these alignments might arise from focal deletions. In RNA-Seq data deletions of up to several hundred kb are hard to distinguish from splicing. They are represented identically as gapped alignments, because the sizes of many introns are in fact of this order of magnitude. STAR applies a rather arbitrary measure to decide whether a gapped alignment arises from splicing or from a genomic deletion: The parameter --alignIntronMax determines what gap size is still assumed to be a splicing event and introns are used to represent these gaps. Only gaps larger than this limit are classified as potential evidence for genomic deletions and are stored as chimeric alignments. Most STAR-based fusion detection tools only consider chimeric alignments as evidence for gene fusions and are blind to focal deletions, hence. As a workaround, these tools recommend reducing the value of the parameter --alignIntronMax. But this impairs the quality of alignment, because it reduces the scope that STAR searches to find a spliced alignment. To avoid compromising the quality of alignment for the sake of fusion detection, the only solution would be to run STAR twice - once with settings optimized for regular alignment and once for fusion detection. This would double the runtime. In contrast, Arriba does not require to reduce the maximum intron size. It employs a more sensible criterion to distinguish splicing from deletions: Arriba considers all those reads as potential evidence for deletions that span the boundary of annotated genes. | 617 * Alignments which cross the boundaries of annotated genes, because these alignments might arise from focal deletions. In RNA-Seq data deletions of up to several hundred kb are hard to distinguish from splicing. They are represented identically as gapped alignments, because the sizes of many introns are in fact of this order of magnitude. STAR applies a rather arbitrary measure to decide whether a gapped alignment arises from splicing or from a genomic deletion: The parameter --alignIntronMax determines what gap size is still assumed to be a splicing event and introns are used to represent these gaps. Only gaps larger than this limit are classified as potential evidence for genomic deletions and are stored as chimeric alignments. Most STAR-based fusion detection tools only consider chimeric alignments as evidence for gene fusions and are blind to focal deletions, hence. As a workaround, these tools recommend reducing the value of the parameter --alignIntronMax. But this impairs the quality of alignment, because it reduces the scope that STAR searches to find a spliced alignment. To avoid compromising the quality of alignment for the sake of fusion detection, the only solution would be to run STAR twice - once with settings optimized for regular alignment and once for fusion detection. This would double the runtime. In contrast, Arriba does not require to reduce the maximum intron size. It employs a more sensible criterion to distinguish splicing from deletions: Arriba considers all those reads as potential evidence for deletions that span the boundary of annotated genes. |