comparison star_fusion.xml @ 0:93704f98f56e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/star_fusion commit ec27c2abb7c8ba0bcbcb2f26cca9ef1109f7a3a2
author iuc
date Tue, 06 Sep 2016 04:55:21 -0400
parents
children 0b44456754e2
comparison
equal deleted inserted replaced
-1:000000000000 0:93704f98f56e
1 <tool id="star_fusion" name="STAR-Fusion" version="0.5.4-2" profile="16.07">
2 <description>detect fusion genes in RNA-Seq data</description>
3 <requirements>
4 <!-- Bio-conda -->
5 <requirement type="package" version="0.5.4">star-fusion</requirement>
6 </requirements>
7
8 <stdio>
9 <regex match="command not found" source="stderr" level="fatal"/>
10 <regex match="EXITING because of INPUT ERROR" source="stderr" level="fatal"/>
11 <regex match="FATAL ERROR" source="stderr" level="fatal"/>
12
13 <regex match="Warning:" source="stderr" level="warning"/>
14 <regex match="CMD:" source="stderr" level="warning"/>
15
16 <regex match="-done creating index file:" source="stderr" level="warning"/>
17 <regex match="-parsing GTF file:" source="stderr" level="warning"/>
18 <regex match="-building interval tree" source="stderr" level="warning"/>
19 <regex match="-parsing fusion evidence:" source="stderr" level="warning"/>
20 <regex match="-mapping reads to genes" source="stderr" level="warning"/>
21 <regex match="-outputting fusion candidates to file:" source="stderr" level="warning"/>
22
23 <regex match="Process complete" source="stderr" level="warning"/>
24 </stdio>
25
26 <version_command>STAR-Fusion --version 2>&amp;1 | grep version | grep -o -E "software version.*?"</version_command>
27
28 <command><![CDATA[
29 ## 1. ensure the blastn file is provided as *.gz
30 if file --mime-type '${blast_pairs}' | grep -q /gzip\$; then
31 gzip_suffix='' ;
32 else
33 ## Older versions of gzip do not support the -k option to keep
34 ## the original file - this should be an universion solution
35
36 gzip -1 -c -- '${blast_pairs}' > '${blast_pairs}.gz' &&
37 gzip_suffix='.gz' ;
38 fi &&
39
40 ## 2. create reference index - using \$(pwd) is necessary, probably because the perl script changes work directory
41 ## - @todo once write a decent STAR and STAR Fusion data manager
42 prep_genome_lib.pl
43 --genome_fa '${fasta_type.ownFile}'
44 --gtf '${geneModel}'
45 --blast_pairs "${blast_pairs}\$gzip_suffix"
46 --CPU \${GALAXY_SLOTS:-1}
47 --output_dir "\$(pwd)/tmp_star_fusion_genome_dir"
48 &&
49
50 ## 3. Run STAR-Fusion
51 STAR-Fusion
52 #if str($input_params.input_source) == "use_chimeric":
53 --chimeric_junction '${input_params.chimeric_junction}'
54 #else:
55 --left_fq '${input_params.left_fq}'
56 #if $input_params.right_fq:
57 --right_fq '${input_params.right_fq}'
58 #end if
59 #end if
60
61 --genome_lib_dir "\$(pwd)/tmp_star_fusion_genome_dir"
62
63 #if str($params.settingsType) == "full":
64 --min_junction_reads $params.min_junction_reads
65 --min_sum_frags $params.min_sum_frags
66 --max_promiscuity $params.max_promiscuity
67 --min_novel_junction_support $params.min_novel_junction_support
68 --min_alt_pct_junction $params.min_alt_pct_junction
69 --aggregate_novel_junction_dist $params.aggregate_novel_junction_dist
70 --E $params.E
71 #end if
72 ]]></command>
73
74 <inputs>
75 <conditional name="input_params">
76 <param name="input_source"
77 type="select"
78 label="Use output from earlier STAR run or let STAR Fusion control running STAR">
79 <option value="use_chimeric">Use output from earlier STAR</option>
80 <option value="use_fastq">Let STAR Fusion control running STAR</option>
81 </param>
82 <when value="use_chimeric">
83 <param name="chimeric_junction"
84 type="data"
85 format="interval"
86 argument="--chimeric_junction"
87 label="Chimeric junction file from STAR (with STAR-Fusion settings)"/>
88 </when>
89 <when value="use_fastq">
90 <param name="left_fq"
91 type="data"
92 format="fastqsanger"
93 argument="--left_fq"
94 label="left.fq file"/>
95 <param name="right_fq"
96 type="data"
97 format="fastqsanger"
98 optional="true"
99 argument="--right_fq"
100 label="right.fq file (actually optional, but highly recommended)"/>
101 </when>
102 </conditional>
103
104 <!-- Genome source. -->
105 <conditional name="fasta_type">
106 <param name="fasta_type_selector" type="select" label="Source for sequence to search">
107 <option value="cached">Locally Cached sequences</option>
108 <option value="history" selected="true">Sequences from your history</option>
109 </param>
110 <when value="cached">
111 <param name="ownFile"
112 type="select" label="Genome to search">
113 <options from_data_table="all_fasta">
114 <column name="dbkey" index="1"/>
115 <column name="name" index="2"/>
116 <column name="value" index="3"/>
117 </options>
118 </param>
119 </when>
120 <when value="history">
121 <param name="ownFile"
122 type="data"
123 format="fasta"
124 metadata_name="dbkey"
125 label="Select the reference genome (FASTA file)"/>
126 </when>
127 </conditional>
128
129 <param name="geneModel"
130 type="data"
131 format="gff3,gtf"
132 label="Gene model (gff3,gtf) file for splice junctions and fusion gene detection"/>
133 <param name="blast_pairs"
134 type="data"
135 format="tabular"
136 label="Result of BLAST+-blastn of the reference fasta sequence with itself"
137 help="Run blastn with '-outputfmt 6' or choose 'Tabular (standard 12 columns)' in the Galaxy wrapper. For human data it is recommended to obtain it from here: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/"/>
138
139 <conditional name="params">
140 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR Fusion parameter.">
141 <option value="default" selected="true">Use Defaults</option>
142 <option value="full">Full parameter list</option>
143 </param>
144 <when value="default" />
145 <when value="full"><!-- Full/advanced params. -->
146 <param name="min_junction_reads"
147 type="integer" value="1"
148 label="minimum number of junction-spanning reads required."
149 argument="--min_junction_reads"/>
150 <param name="min_sum_frags"
151 type="integer"
152 value="2"
153 label="minimum fusion support = (#junction_reads + #spanning_frags)"
154 argument="--min_sum_frags"/>
155 <param name="max_promiscuity"
156 type="integer"
157 value="3"
158 label="maximum number of partners allowed for a given fusion"
159 argument="--max_promiscuity"/>
160 <param name="min_novel_junction_support"
161 type="integer"
162 value="3"
163 label="minimum of 3 junction reads required if breakpoint lacks involvement of only reference junctions"
164 argument="--min_novel_junction_support"/>
165 <param name="min_alt_pct_junction"
166 type="float"
167 value="10"
168 label="10% of the dominant isoform junction support"
169 argument="--min_alt_pct_junction"/>
170 <param name="aggregate_novel_junction_dist"
171 type="integer"
172 value="5"
173 label="non-ref junctions within 5 are merged into single calls"
174 argument="--aggregate_novel_junction_dist"/>
175 <param name="E"
176 type="float"
177 value="0.001"
178 label="E-value threshold for blast searches"
179 argument="-E"/>
180 </when>
181 </conditional>
182 </inputs>
183
184 <outputs>
185 <data format="tabular" name="output_final" label="${tool.name} on ${on_string}: fusion_candidates.final" from_work_dir="star-fusion.fusion_candidates.final"/>
186 </outputs>
187
188 <tests>
189 <test>
190 <param name="input_source" value="use_chimeric" />
191 <param name="chimeric_junction" ftype="interval" value="test1.tabular" />
192 <param name="fasta_type_selector" value="history" />
193 <param name="ownFile" ftype="fasta" value="test1.fa" />
194 <param name="geneModel" ftype="gtf" value="test1.gtf" />
195 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
196 <param name="settingsType" value="default" />
197
198 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
199 <output name="output_final">
200 <assert_contents>
201 <has_line line="#fusion_name&#009;JunctionReads&#009;SpanningFrags&#009;Splice_type&#009;LeftGene&#009;LeftBreakpoint&#009;RightGene&#009;RightBreakpoint&#009;JunctionReads&#009;SpanningFrags" />
202 <has_text text="GENE1--GENE2&#009;24&#009;0&#009;INCL_NON_REF_SPLICE&#009;GENE1^GENE1&#009;chr1:240:+&#009;GENE2^GENE2&#009;chr2:241:+" />
203 </assert_contents>
204 </output>
205 </test>
206 <test>
207 <param name="input_source" value="use_fastq" />
208 <param name="left_fq" ftype="fastqsanger" value="test1.fastqsanger"/>
209 <param name="fasta_type_selector" value="history" />
210 <param name="ownFile" ftype="fasta" value="test1.fa" />
211 <param name="geneModel" ftype="gtf" value="test1.gtf" />
212 <param name="blast_pairs" ftype="tabular" value="test1-test1.blastn.tabular" />
213 <param name="settingsType" value="default" />
214
215 <!-- Last column of the results contains data in a random order so exact matching is not feasible -->
216 <output name="output_final">
217 <assert_contents>
218 <has_line line="#fusion_name&#009;JunctionReads&#009;SpanningFrags&#009;Splice_type&#009;LeftGene&#009;LeftBreakpoint&#009;RightGene&#009;RightBreakpoint&#009;JunctionReads&#009;SpanningFrags" />
219 <has_text text="GENE1--GENE2&#009;24&#009;0&#009;INCL_NON_REF_SPLICE&#009;GENE1^GENE1&#009;chr1:240:+&#009;GENE2^GENE2&#009;chr2:241:+" />
220 </assert_contents>
221 </output>
222 </test>
223 </tests>
224 <help>
225 **What it does**
226
227 STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set.
228
229 **Input: files required to run STAR-Fusion**
230 - A genome reference sequence (FASTA-format)
231 - A corresponding protein-coding gene annotation set (GTF/GFF Format)
232 - A last-matching gene pairs file - in Galaxy you can create such files with the *ncbi_blast_plus* tool suite containing *blastn*: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
233 - A STAR chimeric/junction output file - this is optional as STAR Fusion can control running STAR as well.
234
235 The authors of STAR Fusion have made some of these files avaialble at: https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/. The gene annotations in each case are restricted to the protein-coding and lincRNA transcripts.
236 More info: https://github.com/STAR-Fusion/STAR-Fusion/wiki
237
238 </help>
239
240 <citations>
241 <citation type="bibtex">
242 @unpublished{star_fusion,
243 author = {Brian Haas and Nicolas Stransky and Daniel Nicorici},
244 title = {STAR-Fusion},
245 url = {https://github.com/STAR-Fusion/STAR-Fusion}
246 }
247 </citation>
248 </citations>
249 </tool>