comparison rg_rnaStar.xml @ 0:b2326241bb09 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 13522d2ad2efbb8dab405723f491bd1a6591e3ef
author iuc
date Mon, 18 May 2015 13:34:13 -0400
parents
children bc685d13b637
comparison
equal deleted inserted replaced
-1:000000000000 0:b2326241bb09
1 <tool id="rna_star" name="rnastar" version="2.4.0d">
2 <description>Gapped-read mapper for RNA-seq data</description>
3 <requirements>
4 <requirement type="package" version="2.4.0d">rnastar</requirement>
5 <requirement type="package" version="0.1.19">samtools</requirement>
6 </requirements>
7 <stdio>
8 <regex match=".*" source="both" level="warning" description="Some stderr/stdout text"/>
9 </stdio>
10
11 <command>
12 ##
13 ## Run STAR.
14 ##
15 #if str($refGenomeSource.genomeSource) == 'history':
16 mkdir -p tempstargenomedir; STAR --runMode genomeGenerate --genomeDir "tempstargenomedir" --genomeFastaFiles "$refGenomeSource.ownFile" --runThreadN 2
17 #if str($refGenomeSource.geneModel) != 'None':
18 --sjdbOverhang "100" --sjdbGTFfile "$refGenomeSource.geneModel"
19 #if str($refGenomeSource.geneModel.ext) == 'gff3':
20 --sjdbGTFtagExonParentTranscript Parent
21 #end if
22 #end if
23 ;
24 #end if
25 STAR
26 ## Can adjust this as appropriate for the system.
27 --genomeLoad NoSharedMemory
28 #if str($refGenomeSource.genomeSource) == 'history':
29 --genomeDir "tempstargenomedir"
30 #else
31 --genomeDir "$refGenomeSource.index.fields.path"
32 #end if
33 --readFilesIn $singlePaired.input1
34 #if str($singlePaired.sPaired) == "paired"
35 $singlePaired.input2
36 #end if
37 --runThreadN 4
38 #if str($params.settingsType) == "full":
39 --chimSegmentMin $params.chim_segment_min
40 --chimScoreMin $params.chim_score_min
41 #end if
42
43 ## may or may not need to generate SAM tags and handle non-canonicals for Cufflinks tools.
44 $outSAMstrandField $outFilterIntronMotifs $outSAMattributes
45
46 ;
47 ##
48 ## BAM conversion.
49 ##
50
51 ## Convert aligned reads.
52 samtools view -Shb Aligned.out.sam | samtools sort - AlignedSorted 2&gt;/dev/null
53
54 ## Convert chimeric reads.
55 #if str($params.settingsType) == "full" and $params.chim_segment_min > 0:
56 ; samtools view -Shb Chimeric.out.sam | samtools sort - ChimericSorted 2&gt;/dev/null
57 #end if
58 </command>
59
60 <inputs>
61 <param name="jobName" type="text" size="120" value="rna-star run" label="Job narrative (added to output names)"
62 help="Only letters, numbers and underscores (_) will be retained in this field">
63 <sanitizer invalid_char="">
64 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
65 </sanitizer>
66 </param>
67 <!-- FASTQ input(s) and options specifically for paired-end data. -->
68 <conditional name="singlePaired">
69 <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">
70 <option value="single" selected="true">Single-end</option>
71 <option value="paired">Paired-end</option>
72 </param>
73 <when value="single">
74 <param format="fastqsanger,fastq,fasta" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
75 </when>
76 <when value="paired">
77 <param format="fastqsanger,fastq,fasta" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads"
78 help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
79 <param format="fastqsanger,fastq,fasta" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads"
80 help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
81 </when>
82 </conditional>
83
84 <!-- Genome source. -->
85 <conditional name="refGenomeSource">
86 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
87 <option value="indexed" selected="True">Use a built-in index</option>
88 <option value="history">Index and use a genome fasta file from my current history</option>
89 </param>
90 <when value="indexed">
91 <param name="index" type="select" label="Select a reference genome">
92 <options from_data_table="rnastar_index">
93 <filter type="sort_by" column="2"/>
94 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
95 </options>
96 </param>
97 </when>
98 <when value="history">
99 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
100 <param name="geneModel" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions. Leave blank for none"
101 optional="true" help="Optional. If supplied, the index file will retain exon junction information for mapping splices" />
102 </when>
103 </conditional>
104 <param name="outSAMattributes" type="select" label="Include extra sam attributes for downstream processing">
105 <option value="--outSAMattributes Standard">Standard - eg for old Samtools downstream</option>
106 <option value="--outSAMattributes All" selected="true">All modern Samtools attributes - see below</option>
107 </param>
108 <param name="outSAMstrandField" type="select" label="Include extra sam attributes for downstream processing">
109 <option value="--outSAMstrandField intronMotif" selected="true">Add XS for cufflinks</option>
110 <option value="">No XS added to sam output</option>
111 </param>
112 <param name="outFilterIntronMotifs" type="select" label="Canonical junction preparation for unstranded data">
113 <option value="">No special handling - all non-canonical junctions passed through</option>
114 <option value="--outFilterIntronMotifs RemoveNoncanonical" selected="true">Remove all non-canonical junctions for eg cufflinks</option>
115 <option value="--outFilterIntronMotifs RemoveNoncanonicalUnannotated">Remove only unannotated non-canonical junctions for eg cufflinks</option>
116 </param>
117 <!-- Parameter settings. -->
118 <conditional name="params">
119 <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any STAR parameter.">
120 <option value="preSet" selected="true">Use Defaults</option>
121 <option value="full">Full parameter list</option>
122 </param>
123 <when value="preSet" />
124 <!-- Full/advanced params. -->
125 <when value="full">
126 <param name="chim_segment_min" type="integer" min="0" value="0" label="Minimum chimeric segment length" />
127 <param name="chim_score_min" type="integer" min="0" value="0" label="Minimum total (summed) score of the chimeric segments" />
128
129 </when>
130 </conditional>
131 </inputs>
132
133 <outputs>
134 <data format="txt" name="output_log" label="${jobName}.log" from_work_dir="Log.final.out"/>
135 <data format="interval" name="chimeric_junctions" label="${jobName}_starchimjunc.bed" from_work_dir="Chimeric.out.junction">
136 <filter>(params['settingsType'] == 'full' and params['chim_segment_min'] > 0)</filter>
137 <actions>
138 <conditional name="refGenomeSource.genomeSource">
139 <when value="indexed">
140 <action type="metadata" name="dbkey">
141 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
142 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
143 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
144 </option>
145 </action>
146 </when>
147 <when value="history">
148 <action type="metadata" name="dbkey">
149 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
150 </action>
151 </when>
152 </conditional>
153 </actions>
154 </data>
155 <data format="bam" name="chimeric_reads" label="${jobName}_starmappedchim.bam"
156 from_work_dir="ChimericSorted.bam">
157 <filter>(params['settingsType'] == 'full' and params['chim_segment_min'] > 0)</filter>
158 <actions>
159 <conditional name="refGenomeSource.genomeSource">
160 <when value="indexed">
161 <action type="metadata" name="dbkey">
162 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
163 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
164 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
165 </option>
166 </action>
167 </when>
168 <when value="history">
169 <action type="metadata" name="dbkey">
170 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
171 </action>
172 </when>
173 </conditional>
174 </actions>
175 </data>
176 <data format="interval" name="splice_junctions" label="${jobName}_starsplicejunct.bed"
177 from_work_dir="SJ.out.tab">
178 <actions>
179 <conditional name="refGenomeSource.genomeSource">
180 <when value="indexed">
181 <action type="metadata" name="dbkey">
182 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
183 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
184 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
185 </option>
186 </action>
187 </when>
188 <when value="history">
189 <action type="metadata" name="dbkey">
190 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
191 </action>
192 </when>
193 </conditional>
194 </actions>
195 </data>
196 <data format="bam" name="mapped_reads" label="${jobName}_starmapped.bam"
197 from_work_dir="AlignedSorted.bam">
198 <actions>
199 <conditional name="refGenomeSource.genomeSource">
200 <when value="indexed">
201 <action type="metadata" name="dbkey">
202 <option type="from_data_table" name="rnastar_index" column="1" offset="0">
203 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
204 <filter type="param_value" ref="refGenomeSource.index" column="0"/>
205 </option>
206 </action>
207 </when>
208 <when value="history">
209 <action type="metadata" name="dbkey">
210 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
211 </action>
212 </when>
213 </conditional>
214 </actions>
215 </data>
216 </outputs>
217 <tests>
218 <test>
219 <param name='input1' value='tophat_in2.fastqsanger' ftype='fastqsanger' />
220 <param name='jobName' value='rnastar_test' />
221 <param name='genomeSource' value='history' />
222 <param name='ownFile' value='tophat_test.fa' />
223 <param name='sPaired' value='single' />
224 <param name='outSAMattributes' value='--outSAMattributes All' />
225 <param name='outSAMstrandField' value='--outSAMstrandField intronMotif' />
226 <param name='outFilterIntronMotifs' value='--outFilterIntronMotifs RemoveNoncanonical' />
227 <output name='output_log' file='rnastar_test.log' compare='diff' lines_diff = '10'/>
228 <output name='splice_junctions' file="rnastar_test_splicejunctions.bed" compare="sim_size" delta="200"/>
229 <output name='mapped_reads' file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="200" />
230 </test>
231 </tests>
232 <help>
233
234 **What it does**
235 Runs the rna star gapped aligner. Suited to paired or single end rna-seq.
236
237 8.2: SAM alignments
238
239 The number of loci Nmap a read maps to (multi-mapping) is given by NH:i: field.
240 The mapping quality MAPQ (column 5) is 255 for uniquely mapping reads, and int(-10*log10(1-1/Nmap)) for
241 multi-mapping reads. This scheme is same as the one used by Tophat and is compatible with Cufflinks.
242
243 For multi-mappers, all alignments except one are marked with 0x100 (secondary alignment) in the FLAG
244 column 2. The un-marked alignment is either the best one (i.e. highest scoring), or is randomly selected from
245 the alignments of equal quality.
246
247 8.2.1: Standard SAM attributes
248 With default --outSAMattributes Standard option the following SAM attributes will be generated:
249
250 Column 12: NH: number of loci a read (pair) maps to
251 Column 13: IH: alignment index for all alignments of a read
252 Column 14: aS: alignment score
253 Column 15: nM: number of mismatches (does not include indels)
254
255 8.2.2: Extra SAM attrbiutes
256 If --outSAMattributes All option is used, the following additional attributes will be output:
257
258 Column 16: jM:B:c,M1,M2,... Intron motifs for all junctions (i.e. N in CIGAR):
259 0: non-canonical; 1:GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT.
260
261 If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.
262 Column 17: jI:B:I,Start1,End1,Start2,End2,... Start and End of introns for all junctions (1-based)
263
264 Note, that samtools 0.1.18 or later have to be used with these extra attributes.
265
266
267 8.2.3: XS SAM strand attribute for Cufflinks/Cuffdiff
268
269 If you have un-stranded RNA-seq data, and wish to run Cufflinks/Cuffdiff on STAR alignments, you will
270 need to run STAR with --outSAMstrandField intronMotif option, which will generate the XS
271 strand attribute for all alignments that contain splice junctions. The spliced alignments that have undefined
272 strand (i.e. containing only non-canonical junctions) will be suppressed.
273
274 If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need
275 to run Cufflinks with the library option --library-type options. For example, cufflinks with
276 library-type fr-firststrand should be used for the b
277
278 It is recommended to remove the non-canonical junctions for Cufflinks runs using b
279
280
281 --outFilterIntronMotifs RemoveNoncanonical
282 filter out alignments that contain non-canonical junctions
283
284 OR
285
286 --outFilterIntronMotifs RemoveNoncanonicalUnannotated
287 filter out alignments that contain non-canonical unannotated junctions
288 when using annotated splice junctions database. The annotated non-
289 canonical junctions will be kept.
290
291
292 **Attributions**
293
294 Note that each component has its own license. Good luck with figuring out your obligations.
295
296 rna_star - see the web site at rna_star_
297
298 For details, please see the rna_starMS_
299 "STAR: ultrafast universal RNA-seq aligner"
300 A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635
301
302 Galaxy_ (that's what you are using right now!) for gluing everything together
303
304 Most of the work for this wrapper XML is Jeremy Goecks' original STAR_ wrapper
305
306 Minor tweaks to output names to suit our downstream purposes, toolshed automated dependencies
307 and odds and ends of other code and documentation comprising this tool was
308 written by Ross Lazarus and that part is licensed_ the same way as other rgenetics artefacts
309
310 .. _STAR: https://bitbucket.org/jgoecks/jeremys-code/raw/fa1930a689b8e2f6b59cc1706e5ba0ed8ad357be/galaxy/tool-wrappers/star.xml
311 .. _licensed: http://creativecommons.org/licenses/by-nc-nd/3.0/
312 .. _rna_star: http://code.google.com/p/rna-star/
313 .. _rna_starMS: http://bioinformatics.oxfordjournals.org/content/29/1/15.full
314 .. _Galaxy: http://getgalaxy.org
315
316 </help>
317 <citations>
318 <citation type="doi">doi: 10.1093/bioinformatics/bts635</citation>
319 </citations>
320 </tool>
321