Mercurial > repos > bgruening > salmon
comparison old/salmon.xml @ 11:666bb48b1007 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 02087ce2966cf8b4aac9197a41171e7f986c11d1-dirty"
author | bgruening |
---|---|
date | Wed, 02 Oct 2019 04:31:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
10:4de6e2e40c7a | 11:666bb48b1007 |
---|---|
1 <tool id="salmon" name="Salmon" version="@VERSION@"> | |
2 | |
3 <description>Transcript Quantification from RNA-seq data</description> | |
4 | |
5 <macros> | |
6 <xml name="strandedness"> | |
7 <param name="strandedness" type="select" label="Specify the strandedness of the reads"> | |
8 <option value="U" selected="True">Not stranded (U)</option> | |
9 <option value="SF">read 1 (or single-end read) comes from the forward strand (SF)</option> | |
10 <option value="SR">read 1 (or single-end read) comes from the reverse strand (SR)</option> | |
11 </param> | |
12 </xml> | |
13 <xml name="orientation"> | |
14 <param name="orientation" type="select" label="Relative orientation of reads within a pair"> | |
15 <option value="M">Mates are oriented in the same direction (M = matching)</option> | |
16 <option value="O">Mates are oriented away from each other (O = outward)</option> | |
17 <option value="I" selected="True">Mates are oriented toward each other (I = inward)</option> | |
18 </param> | |
19 </xml> | |
20 <token name="@VERSION@">0.11.2</token> | |
21 <token name="@IDX_VERSION@">q5</token> | |
22 </macros> | |
23 | |
24 <requirements> | |
25 <requirement type="package" version="1.0.6">bzip2</requirement> | |
26 <requirement type="package" version="@VERSION@">salmon</requirement> | |
27 <requirement type="package" version="1.2">seqtk</requirement> | |
28 </requirements> | |
29 | |
30 <stdio> | |
31 <exit_code range="1:" /> | |
32 <exit_code range=":-1" /> | |
33 <regex match="Error:" /> | |
34 <regex match="Exception:" /> | |
35 <regex match="Exception :" /> | |
36 </stdio> | |
37 <version_command>salmon -version</version_command> | |
38 <command><![CDATA[ | |
39 mkdir ./index | |
40 && | |
41 mkdir ./output | |
42 #if $refTranscriptSource.TranscriptSource == "history": | |
43 && | |
44 salmon index | |
45 --transcripts $refTranscriptSource.ownFile | |
46 --kmerLen $refTranscriptSource.kmerLen | |
47 --threads "\${GALAXY_SLOTS:-4}" | |
48 --index './index' | |
49 --type '$quasi_orphans.type' | |
50 $perfectHash | |
51 #set $index_path = './index' | |
52 #else: | |
53 #set $index_path = $refTranscriptSource.index.fields.path | |
54 #end if | |
55 && | |
56 #set compressed = 'no' | |
57 #if $single_or_paired.single_or_paired_opts == 'single': | |
58 #if $single_or_paired.input_singles.ext == 'fasta': | |
59 #set $ext = 'fasta' | |
60 #else: | |
61 #if $single_or_paired.input_singles.is_of_type("fastq.gz", "fastqsanger.gz"): | |
62 #set compressed = 'GZ' | |
63 #else if $single_or_paired.input_singles.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
64 #set compressed = 'BZ2' | |
65 #end if | |
66 #set $ext = 'fastq' | |
67 #end if | |
68 ln -s $single_or_paired.input_singles ./single.$ext && | |
69 #else if $single_or_paired.single_or_paired_opts == 'paired': | |
70 #if $single_or_paired.input_mate1.ext == 'fasta': | |
71 #set $ext = 'fasta' | |
72 #else: | |
73 #if $single_or_paired.input_mate1.is_of_type("fastq.gz", "fastqsanger.gz"): | |
74 #set compressed = 'GZ' | |
75 #else if $single_or_paired.input_mate1.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
76 #set compressed = 'BZ2' | |
77 #end if | |
78 #set $ext = 'fastq' | |
79 #end if | |
80 ln -s $single_or_paired.input_mate1 ./mate1.$ext && | |
81 ln -s $single_or_paired.input_mate2 ./mate2.$ext && | |
82 #else if $single_or_paired.single_or_paired_opts == 'paired_collection': | |
83 #if $single_or_paired.input_1.forward.ext == 'fasta': | |
84 #set $ext = 'fasta' | |
85 #else: | |
86 #if $single_or_paired.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"): | |
87 #set compressed = 'GZ' | |
88 #else if $single_or_paired.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
89 #set compressed = 'BZ2' | |
90 #end if | |
91 #set $ext = 'fastq' | |
92 #end if | |
93 ln -s ${single_or_paired.input_1.forward} ./mate1.$ext && | |
94 ln -s ${single_or_paired.input_1.reverse} ./mate2.$ext && | |
95 #else if $single_or_paired.single_or_paired_opts == 'paired_interleaved': | |
96 #if $single_or_paired.input_1.ext == 'fasta': | |
97 #set $ext = 'fasta' | |
98 #else: | |
99 #if $single_or_paired.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): | |
100 #set compressed = 'GZ' | |
101 #else if $single_or_paired.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
102 #set compressed = 'BZ2' | |
103 #end if | |
104 #set $ext = 'fastq' | |
105 #end if | |
106 ln -s $single_or_paired.input_1 ./mate1.$ext && | |
107 #end if | |
108 #if $geneMap: | |
109 ln -s "$geneMap" ./geneMap.${geneMap.ext} && | |
110 #end if | |
111 salmon quant | |
112 --index $index_path | |
113 #if $single_or_paired.single_or_paired_opts == 'single': | |
114 --libType ${single_or_paired.strandedness} | |
115 #if $compressed == 'GZ': | |
116 --unmatedReads <(zcat < ./single.$ext) | |
117 #else if $compressed == 'BZ2': | |
118 --unmatedReads <(bzcat < ./single.$ext) | |
119 #else: | |
120 --unmatedReads ./single.$ext | |
121 #end if | |
122 #else: | |
123 --libType "${single_or_paired.orientation}${single_or_paired.strandedness}" | |
124 #if $single_or_paired.single_or_paired_opts == 'paired_interleaved': | |
125 #if $compressed == 'BZ2': | |
126 --mates1 <(bzcat < ./mate1.$ext | seqtk seq -1) | |
127 --mates2 <(bzcat < ./mate1.$ext | seqtk seq -2) | |
128 #else: | |
129 --mates1 <(seqtk seq -1 ./mate1.$ext) | |
130 --mates2 <(seqtk seq -2 ./mate1.$ext) | |
131 #end if | |
132 #else: | |
133 #if $compressed == 'GZ': | |
134 --mates1 <(zcat < ./mate1.$ext) | |
135 --mates2 <(zcat < ./mate2.$ext) | |
136 #else if $compressed == 'BZ2': | |
137 --mates1 <(bzcat < ./mate1.$ext) | |
138 --mates2 <(bzcat < ./mate2.$ext) | |
139 #else: | |
140 --mates1 ./mate1.$ext | |
141 --mates2 ./mate2.$ext | |
142 #end if | |
143 #end if | |
144 #end if | |
145 --output ./output | |
146 #if str($quasi_orphans.type) == 'quasi': | |
147 --allowOrphans | |
148 $quasi_orphans.validateMappings | |
149 --ma $quasi_orphans.matchScore | |
150 --mp $quasi_orphans.mismatchPenalty | |
151 --go $quasi_orphans.gapOpenPenalty | |
152 --ge $quasi_orphans.gapExtensionPenalty | |
153 --minScoreFraction $quasi_orphans.minScoreFraction | |
154 #end if | |
155 $seqBias | |
156 $gcBias | |
157 $noErrorModel | |
158 --threads "\${GALAXY_SLOTS:-4}" | |
159 --incompatPrior $adv.incompatPrior | |
160 $adv.consistentHits | |
161 $adv.dumpEq | |
162 $adv.reduceGCMemory | |
163 #if str($adv.biasSpeedSamp): | |
164 --biasSpeedSamp $adv.biasSpeedSamp | |
165 #end if | |
166 $adv.strictIntersect | |
167 #if str($adv.fldMax): | |
168 --fldMax $adv.fldMax | |
169 #end if | |
170 #if str($adv.fldMean): | |
171 --fldMean $adv.fldMean | |
172 #end if | |
173 #if str($adv.fldSD): | |
174 --fldSD $adv.fldSD | |
175 #end if | |
176 #if $adv.forgettingFactor: | |
177 --forgettingFactor $adv.forgettingFactor | |
178 #end if | |
179 $adv.initUniform | |
180 $adv.noFragLengthDist | |
181 $adv.noBiasLengthThreshold | |
182 #if str($adv.maxReadOcc): | |
183 --maxReadOcc $adv.maxReadOcc | |
184 #end if | |
185 #if $geneMap: | |
186 --geneMap ./geneMap.${geneMap.ext} | |
187 #end if | |
188 $adv.noEffectiveLengthCorrection | |
189 $adv.useEM | |
190 #if str($adv.numBiasSamples): | |
191 --numBiasSamples $adv.numBiasSamples | |
192 #end if | |
193 #if str($adv.numAuxModelSamples): | |
194 --numAuxModelSamples $adv.numAuxModelSamples | |
195 #end if | |
196 #if str($adv.numPreAuxModelSamples): | |
197 --numPreAuxModelSamples $adv.numPreAuxModelSamples | |
198 #end if | |
199 #if str($adv.numGibbsSamples): | |
200 --numGibbsSamples $adv.numGibbsSamples | |
201 #end if | |
202 #if str($adv.numBootstraps): | |
203 --numBootstraps $adv.numBootstraps | |
204 #end if | |
205 #if str($adv.consensusSlack): | |
206 --consensusSlack $adv.consensusSlack | |
207 #else: | |
208 #if $quasi_orphans.validateMappings: | |
209 --consensusSlack 1 | |
210 #else: | |
211 --consensusSlack 0 | |
212 #end if | |
213 #end if | |
214 $adv.perTranscriptPrior | |
215 #if $adv.vbPrior: | |
216 --vbPrior $adv.vbPrior | |
217 #end if | |
218 $adv.writeUnmappedNames | |
219 --sigDigits $adv.sigDigits | |
220 #if str($adv.writeMappings): | |
221 $adv.writeMappings > ${output_sam} | |
222 #end if | |
223 ]]> | |
224 </command> | |
225 | |
226 <inputs> | |
227 <conditional name="refTranscriptSource"> | |
228 <param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?" help="Built-ins were indexed using default options"> | |
229 <option value="indexed">Use a built-in index</option> | |
230 <option value="history" selected="True">Use one from the history</option> | |
231 </param> | |
232 <when value="indexed"> | |
233 <param name="index" type="select" label="Select a reference transcriptome" help="If your transcriptome of interest is not listed, contact your Galaxy admin"> | |
234 <options from_data_table="salmon_indexes_versioned"> | |
235 <filter type="sort_by" column="2"/> | |
236 <filter type="static_value" column="4" value="@IDX_VERSION@" /> | |
237 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
238 </options> | |
239 </param> | |
240 </when> <!-- build-in --> | |
241 <when value="history"> | |
242 <param name="ownFile" type="data" format="fasta" label="Select the reference transcriptome" help="in FASTA format" /> | |
243 <param argument="kmerLen" type="integer" value="31" label="The size should be odd number."/> | |
244 </when> <!-- history --> | |
245 </conditional> | |
246 <conditional name="single_or_paired"> | |
247 <param name="single_or_paired_opts" type="select" label="Is this library mate-paired?"> | |
248 <option value="single">Single-end</option> | |
249 <option value="paired">Paired-end</option> | |
250 <option value="paired_collection">Paired-end Dataset Collection</option> | |
251 <option value="paired_interleaved">Paired-end data from single interleaved dataset</option> | |
252 </param> | |
253 <when value="single"> | |
254 <param name="input_singles" type="data" format="fastq,fasta,fastq.gz,fastq.bz2" label="FASTQ/FASTA file" help="FASTQ file." /> | |
255 <expand macro="strandedness" /> | |
256 </when> | |
257 <when value="paired"> | |
258 <param name="input_mate1" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 1" help="FASTQ file." /> | |
259 <param name="input_mate2" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 2" help="FASTQ file." /> | |
260 <expand macro="orientation" /> | |
261 <expand macro="strandedness" /> | |
262 </when> | |
263 <when value="paired_collection"> | |
264 <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype "fastqsanger" or "fasta"" /> | |
265 <expand macro="orientation" /> | |
266 <expand macro="strandedness" /> | |
267 </when> | |
268 <when value="paired_interleaved"> | |
269 <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="Interleaved FASTQ file" help="Must be of datatype "fastqsanger" or "fasta". --interleaved"/> | |
270 <expand macro="orientation" /> | |
271 <expand macro="strandedness" /> | |
272 </when> | |
273 </conditional> | |
274 <conditional name="quasi_orphans"> | |
275 <param argument="--type" type="select" label="Type of index" help="When using quasi, orphaned reads will be considered when performing lightweight-alignment."> | |
276 <option value="quasi" selected="True">quasi</option> | |
277 </param> | |
278 <when value="quasi"> | |
279 <param argument="--validateMappings" type="boolean" truevalue="--validateMappings" falsevalue="" checked="False" | |
280 label="Validate mappings" | |
281 help="Validate mappings using alignment-based verifcation. If this flag is passed, quasi-mappings will be validated to ensure that they could give rise to a reasonable alignment before they are further used for quantification."/> | |
282 <param name="matchScore" argument="--ma" type="integer" value="2" | |
283 label="Match Score" | |
284 help="The value given to a match between read and reference nucleotides in an alignment."/> | |
285 <param name="mismatchPenalty" argument="--mp" type="integer" value="4" | |
286 label="Mismatch Penalty" | |
287 help="The value given to a mis-match between read and reference nucleotides in an alignment. This will be cast to a negative value."/> | |
288 <param name="gapOpenPenalty" argument="--go" type="integer" value="5" | |
289 label="Gap Open Penalty" | |
290 help="The value given to a gap opening in an alignment."/> | |
291 <param name="gapExtensionPenalty" argument="--ge" type="integer" value="3" | |
292 label="Gap Extension Penalty" | |
293 help="The value given to a gap extension in an alignment."/> | |
294 <param argument="--minScoreFraction" type="float" value="0.65" min="0.0" max="0.99" | |
295 label="Min Score Fraction" | |
296 help="The fraction of the optimal possible alignment score that a mapping must achieve in order to be considered valid. Should be in (0,1]."/> | |
297 </when> <!-- build-in --> | |
298 </conditional> | |
299 <param argument="--perfectHash" type="boolean" truevalue="--perfectHash" falsevalue="" checked="False" | |
300 label="Perfect Hash" | |
301 help="Build the index using a perfect hash rather than a dense hash. This will require less memory (especially during quantification), but will take longer to construct "/> | |
302 <param argument="--seqBias" type="boolean" truevalue="--seqBias" falsevalue="" checked="False" | |
303 label="Perform sequence-specific bias correction" | |
304 help=""/> | |
305 <param argument="--gcBias" type="boolean" truevalue="--gcBias" falsevalue="" checked="False" | |
306 label="Perform fragment GC bias correction" | |
307 help=""/> | |
308 <param argument="--geneMap" type="data" format="tabular,gff,gtf" optional="True" | |
309 label="File containing a mapping of transcripts to genes" | |
310 help="If this file is provided Salmon will output both quant.sf and quant.genes.sf files, where the latter contains aggregated gene-level abundance estimates. The transcript to gene mapping should be provided as either a GTF file, or a in a simple tab-delimited format where each line contains the name of a transcript and the gene to which it belongs separated by a tab." /> | |
311 <param argument="--noErrorModel" type="boolean" truevalue="--noErrorModel" falsevalue="" checked="False" | |
312 label="No Error Model" | |
313 help="Turn off the alignment error model, which takes into account the the observed frequency of different types of mismatches / indels when computing the likelihood of a given alignment. Turning this off can speed up alignment-based salmon, but can harm quantification accuracy."/> | |
314 <section name="adv" title="Additional Options"> | |
315 <param argument="--writeMappings" type="boolean" truevalue="--writeMappings" falsevalue="" checked="False" | |
316 label="Write Mappings" | |
317 help="If this option is set to 'Yes', then the quasi-mapping results will be written out in SAM-compatible format. By default, output is directed to stdout." /> | |
318 <param argument="--incompatPrior" type="float" optional="True" value="9.9999999999999995e-21" | |
319 label="Incompatible Prior" | |
320 help="This option sets the prior probability that an alignment that disagrees with the specified library type (--libType) results from the true fragment origin. Setting this to 0 specifies that alignments that disagree with the library type should be 'impossible', while setting it to 1 says that alignments that disagree with the library type are no less likely than those that do" /> | |
321 <param argument="--dumpEq" type="boolean" truevalue="--dumpEq" falsevalue="" checked="False" | |
322 label="Dump the equivalence class counts that were computed during quasi-mapping." help=""/> | |
323 <param argument="--reduceGCMemory" type="boolean" truevalue="--reduceGCMemory" falsevalue="" optional="True" checked="False" | |
324 label="If this option is selected, a more memory efficient (but slightly slower representation is used to compute fragment GC content." | |
325 help="Enabling this will reduce memory usage, but can also reduce speed. However, the results themselves will remain the same."/> | |
326 <param argument="--biasSpeedSamp" type="integer" value="1" optional="True" | |
327 label="The value at which the fragment length PMF is down-sampled when evaluating GC fragment bias." help="Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results."/> | |
328 <param argument="--strictIntersect" type="boolean" truevalue="--strictIntersect" falsevalue="" checked="False" | |
329 label="Modifies how orphans are assigned." help="When this flag is set, if the intersection of the quasi-mappings for the left and right is empty, then all mappings for the left and all mappings for the right read are reported as orphaned quasi-mappings."/> | |
330 <param argument="--minLen" type="integer" value="19" optional="True" | |
331 label=" (S)MEMs smaller than this size won't be considered." help="" /> | |
332 <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="False" | |
333 label="Perform sensitive quantification" | |
334 help=" Setting this option enables the splitting of SMEMs that are larger than 1.5 times the minimum seed length (minLen/k above). This may reveal high scoring chains of MEMs that are masked by long SMEMs. However, this option makes lightweight-alignment a bit slower and is usually not necessary if the reference is of reasonable quality." /> | |
335 <param argument="--consistentHits" type="boolean" truevalue="--consistentHits" falsevalue="" checked="False" | |
336 label="Force hits gathered during quasi-mapping to be consistent" | |
337 help="" /> | |
338 <param argument="--extraSensitive" type="boolean" truevalue="--extraSensitive" falsevalue="" checked="False" | |
339 label="Perform extra sensitive quantification" | |
340 help="Setting this option enables an extra pass of 'seed' search. Enabling this option may improve sensitivity (the number of reads having sufficient coverage), but will typically slow down quantification by ~40%. Consider enabling this option if you find the mapping rate to be significantly lower than expected."/> | |
341 <param argument="--coverage" type="float" value="0.69999999999999996" optional="True" | |
342 label="Required coverage of read by union of SMEMs to consider it a hit" | |
343 help="" /> | |
344 <param argument="--fldMax" type="integer" value="1000" optional="True" | |
345 label="The maximum fragment length to consider when building the empirical distribution." | |
346 help=""/> | |
347 <param argument="--fldMean" type="integer" value="200" optional="True" | |
348 label="The mean used in the fragment length distribution prior" | |
349 help="If single end reads are being used for quantification, or there are an insufficient number of uniquely mapping reads when performing paired-end quantification to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/> | |
350 <param argument="--fldSD" type="integer" value="80" optional="True" | |
351 label="Standard deviation" | |
352 help="The standard deviation used in the fragment length distribution prior."/> | |
353 <param argument="--forgettingFactor" type="float" value="0.65000000000000002" optional="True" | |
354 label="The forgetting factor used in the online learning schedule." | |
355 help=" A smaller value results in quicker learning, but higher variance and may be unstable. A larger value results in slower learning but may be more stable. Value should be in the interval (0.5, 1.0]." /> | |
356 <param argument="--initUniform" type="boolean" truevalue="--initUniform" falsevalue="" checked="False" | |
357 label="Initialization with uniform parameters" | |
358 help="initialize the offline inference with uniform parameters, rather than seeding with online parameters." /> | |
359 <param argument="--maxReadOcc" type="integer" value="100" optional="True" | |
360 label="Maximal read mapping occurence" | |
361 help="Reads mapping to more than this many places won't be considered."/> | |
362 <param argument="--noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False" | |
363 label="Disable effective length correction" | |
364 help="Disables effective length correction when computing the probability that a fragment was generated from a transcript. If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/> | |
365 <param argument="--noFragLengthDist" type="boolean" truevalue="--noFragLengthDist" falsevalue="" checked="False" | |
366 label="Ignore fragment length distribution" | |
367 help="[experimental] : Don't consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location. Normally, Fragments with unlikely lengths will be assigned a smaller relative probability than those with more likely lengths. When this flag is passed in, the observed fragment length has no effect on that fragment's a priori probability." /> | |
368 <param argument="--noBiasLengthThreshold" type="boolean" truevalue="--noBiasLengthThreshold" falsevalue="" checked="False" | |
369 label="[experimental] : If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effecctive lengths." | |
370 help="This can increase the precision of bias correction, but harm robustness. The default correction applies a threshold." /> | |
371 <param argument="--numBiasSamples" type="integer" value="2000000" optional="True" | |
372 label="Number of fragment mappings to use when learning the sequence-specific bias model." | |
373 help="" /> | |
374 <param argument="--numAuxModelSamples" type="integer" value="5000000" optional="True" | |
375 label="The first numAuxModelSamples are used to train the auxiliary model parameters." | |
376 help="(e.g. fragment length distribution, bias, etc.). After ther first numAuxModelSamples observations the auxiliary model parameters will be assumed to have converged and will be fixed." /> | |
377 <param argument="--numPreAuxModelSamples" type="integer" value="1000000" optional="True" | |
378 label="The first numPreAuxModelSamples will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models." | |
379 help=" The purpose of ignoring the auxiliary models for the first numPreAuxModelSamples observations is to avoid applying these models before thier parameters have been learned sufficiently well." /> | |
380 <param argument="--splitWidth" type="integer" value="0" optional="True" | |
381 label=" If (S)MEM occurs fewer than this many times, search for smaller, contained MEMs" | |
382 help="The default value will not split (S)MEMs, a higher value will result in more MEMs being explore and, thus, will result in increased running time." /> | |
383 <param argument="--splitSpanningSeeds" type="boolean" truevalue="--splitSpanningSeeds" falsevalue="" checked="False" | |
384 label="Attempt to split seeds that happen to fall on the boundary between two transcripts." | |
385 help="This can improve the fragment hit-rate, but is usually not necessary."/> | |
386 <param argument="--useEM" type="boolean" truevalue="--useEM" falsevalue="" checked="False" | |
387 label="Use the traditional EM algorithm for optimization in the batch passes." | |
388 help=""/> | |
389 <param argument="--numGibbsSamples" type="integer" value="0" optional="True" | |
390 label=" Number of Gibbs sampling rounds to perform." | |
391 help="" /> | |
392 <param argument="--numBootstraps" type="integer" value="0" optional="True" | |
393 label="Number of bootstrap samples to generate. Note: This is mutually exclusive with Gibbs sampling." | |
394 help="" /> | |
395 <param argument="--perTranscriptPrior" type="boolean" truevalue="--perTranscriptPrior" falsevalue="" checked="False" | |
396 label="The prior will be interpreted as a transcript-level prior." | |
397 help="either the default or the argument provided via --vbPrior" /> | |
398 <param argument="--vbPrior" type="float" value="0.001" optional="True" | |
399 label="The prior that will be used in the VBEM algorithm." | |
400 help="This is interpreted as a per-nucleotide prior, unless the --perTranscriptPrior flag is also given, in which case this is used as a transcript-level prior." /> | |
401 <param argument="--writeUnmappedNames" type="boolean" truevalue="--writeUnmappedNames" falsevalue="" checked="False" | |
402 label="Write the names of un-mapped reads to the file unmapped_names.txt." | |
403 help=""/> | |
404 <param argument="--sigDigits" type="integer" value="3" | |
405 label="Significant Digits" | |
406 help="The number of significant digits to write when outputting the EffectiveLength and NumReads columns."/> | |
407 <param argument="--consensusSlack" type="integer" optional="True" | |
408 label="Consensus Slack" | |
409 help="The amount of slack allowed in the quasi-mapping consensus mechanism. Normally, a transcript must cover all hits to be considered for mapping. If this is set to a value, X, greater than 0, then a transcript can fail to cover up to X hits before it is discounted as a mapping candidate. The default value of this option is 1 if --validateMappings is given and 0 otherwise."/> | |
410 </section> | |
411 </inputs> | |
412 | |
413 <outputs> | |
414 <data name="output_quant" format="tabular" from_work_dir="output/quant.sf" label="${tool.name} on ${on_string} (Quantification)" /> | |
415 <data name="output_gene_quant" format="tabular" from_work_dir="output/quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)"> | |
416 <filter>geneMap</filter> | |
417 </data> | |
418 <data name="output_sam" format="sam" label="${tool.name} on ${on_string} (SAM format)"> | |
419 <filter>adv['writeMappings']</filter> | |
420 </data> | |
421 </outputs> | |
422 | |
423 <tests> | |
424 <test> | |
425 <param name="single_or_paired_opts" value="paired" /> | |
426 <param name="input_mate1" value="reads_1.fastq" /> | |
427 <param name="input_mate2" value="reads_2.fastq" /> | |
428 <param name="biasCorrect" value="False" /> | |
429 <param name="TranscriptSource" value="history" /> | |
430 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
431 <output name="output_quant"> | |
432 <assert_contents> | |
433 <has_text text="EffectiveLength" /> | |
434 <has_text text="TPM" /> | |
435 <has_text text="NM_001168316" /> | |
436 <has_text text="NM_174914" /> | |
437 <has_text text="NM_018953" /> | |
438 <has_text text="NR_003084" /> | |
439 <has_text text="NM_017410" /> | |
440 <has_text text="NM_153693" /> | |
441 <has_text text="NR_031764" /> | |
442 <has_n_columns n="5" /> | |
443 </assert_contents> | |
444 </output> | |
445 </test> | |
446 <test> <!--test use of built-in index--> | |
447 <param name="single_or_paired_opts" value="paired" /> | |
448 <param name="input_mate1" value="reads_1.fastq" /> | |
449 <param name="input_mate2" value="reads_2.fastq" /> | |
450 <param name="biasCorrect" value="False" /> | |
451 <param name="TranscriptSource" value="indexed" /> | |
452 <param name="index" value="hg19_transcript_subset" /> | |
453 <output name="output_quant"> | |
454 <assert_contents> | |
455 <has_text text="EffectiveLength" /> | |
456 <has_text text="TPM" /> | |
457 <has_text text="NM_001168316" /> | |
458 <has_text text="NM_174914" /> | |
459 <has_text text="NM_018953" /> | |
460 <has_text text="NR_003084" /> | |
461 <has_text text="NM_017410" /> | |
462 <has_text text="NM_153693" /> | |
463 <has_text text="NR_031764" /> | |
464 <has_n_columns n="5" /> | |
465 </assert_contents> | |
466 </output> | |
467 </test> | |
468 <test> <!-- gzipped input --> | |
469 <param name="single_or_paired_opts" value="paired" /> | |
470 <param name="input_mate1" value="reads_1.fastq.gz" ftype="fastqsanger.gz" /> | |
471 <param name="input_mate2" value="reads_2.fastq.gz" ftype="fastqsanger.gz" /> | |
472 <param name="biasCorrect" value="False" /> | |
473 <param name="TranscriptSource" value="history" /> | |
474 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
475 <output name="output_quant"> | |
476 <assert_contents> | |
477 <has_text text="EffectiveLength" /> | |
478 <has_text text="TPM" /> | |
479 <has_text text="NM_001168316" /> | |
480 <has_text text="NM_174914" /> | |
481 <has_text text="NM_018953" /> | |
482 <has_text text="NR_003084" /> | |
483 <has_text text="NM_017410" /> | |
484 <has_text text="NM_153693" /> | |
485 <has_text text="NR_031764" /> | |
486 <has_n_columns n="5" /> | |
487 </assert_contents> | |
488 </output> | |
489 </test> | |
490 <test> <!-- bzipped input --> | |
491 <param name="single_or_paired_opts" value="paired" /> | |
492 <param name="input_mate1" value="reads_1.fastq.bz2" ftype="fastqsanger.bz2" /> | |
493 <param name="input_mate2" value="reads_2.fastq.bz2" ftype="fastqsanger.bz2" /> | |
494 <param name="biasCorrect" value="False" /> | |
495 <param name="TranscriptSource" value="history" /> | |
496 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
497 <output name="output_quant"> | |
498 <assert_contents> | |
499 <has_text text="EffectiveLength" /> | |
500 <has_text text="TPM" /> | |
501 <has_text text="NM_001168316" /> | |
502 <has_text text="NM_174914" /> | |
503 <has_text text="NM_018953" /> | |
504 <has_text text="NR_003084" /> | |
505 <has_text text="NM_017410" /> | |
506 <has_text text="NM_153693" /> | |
507 <has_text text="NR_031764" /> | |
508 <has_n_columns n="5" /> | |
509 </assert_contents> | |
510 </output> | |
511 </test> | |
512 <test> <!-- interleaved bz2 input --> | |
513 <param name="single_or_paired_opts" value="paired_interleaved" /> | |
514 <param name="input_1" value="reads_both.fastq.bz2" ftype="fastqsanger.bz2" /> | |
515 <param name="biasCorrect" value="False" /> | |
516 <param name="TranscriptSource" value="history" /> | |
517 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
518 <output name="output_quant"> | |
519 <assert_contents> | |
520 <has_text text="EffectiveLength" /> | |
521 <has_text text="TPM" /> | |
522 <has_text text="NM_001168316" /> | |
523 <has_text text="NM_174914" /> | |
524 <has_text text="NM_018953" /> | |
525 <has_text text="NR_003084" /> | |
526 <has_text text="NM_017410" /> | |
527 <has_text text="NM_153693" /> | |
528 <has_text text="NR_031764" /> | |
529 <has_n_columns n="5" /> | |
530 </assert_contents> | |
531 </output> | |
532 </test> | |
533 <test> <!-- interleaved gz input --> | |
534 <param name="single_or_paired_opts" value="paired_interleaved" /> | |
535 <param name="input_1" value="reads_both.fastq.gz" ftype="fastqsanger.gz" /> | |
536 <param name="biasCorrect" value="False" /> | |
537 <param name="TranscriptSource" value="history" /> | |
538 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
539 <output name="output_quant"> | |
540 <assert_contents> | |
541 <has_text text="EffectiveLength" /> | |
542 <has_text text="TPM" /> | |
543 <has_text text="NM_001168316" /> | |
544 <has_text text="NM_174914" /> | |
545 <has_text text="NM_018953" /> | |
546 <has_text text="NR_003084" /> | |
547 <has_text text="NM_017410" /> | |
548 <has_text text="NM_153693" /> | |
549 <has_text text="NR_031764" /> | |
550 <has_n_columns n="5" /> | |
551 </assert_contents> | |
552 </output> | |
553 </test> | |
554 <test> | |
555 <param name="single_or_paired_opts" value="paired" /> | |
556 <param name="input_mate1" value="reads_1.fastq" /> | |
557 <param name="input_mate2" value="reads_2.fastq" /> | |
558 <param name="TranscriptSource" value="history" /> | |
559 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
560 <param name="geneMap" value="gene_map.tab" ftype="tabular" /> | |
561 <output name="output_quant"> | |
562 <assert_contents> | |
563 <has_text text="EffectiveLength" /> | |
564 <has_text text="TPM" /> | |
565 <has_text text="NM_001168316" /> | |
566 <has_text text="NM_174914" /> | |
567 <has_text text="NM_018953" /> | |
568 <has_text text="NR_003084" /> | |
569 <has_text text="NM_017410" /> | |
570 <has_text text="NM_153693" /> | |
571 <has_text text="NR_031764" /> | |
572 <has_n_columns n="5" /> | |
573 </assert_contents> | |
574 </output> | |
575 <output name="output_gene_quant"> | |
576 <assert_contents> | |
577 <has_text text="EffectiveLength" /> | |
578 <has_text text="TPM" /> | |
579 <has_text text="baz" /> | |
580 <has_text text="bar" /> | |
581 <has_text text="2283" /> | |
582 <has_text text="1640" /> | |
583 <has_n_columns n="5" /> | |
584 </assert_contents> | |
585 </output> | |
586 </test> | |
587 <test> | |
588 <param name="single_or_paired_opts" value="paired_collection" /> | |
589 <param name="input_1"> | |
590 <collection type="paired"> | |
591 <element name="forward" value="reads_1.fastq" ftype="fastqsanger" /> | |
592 <element name="reverse" value="reads_2.fastq" ftype="fastqsanger" /> | |
593 </collection> | |
594 </param> | |
595 <param name="TranscriptSource" value="history" /> | |
596 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
597 <param name="geneMap" value="gene_map.tab" ftype="tabular" /> | |
598 <output name="output_quant"> | |
599 <assert_contents> | |
600 <has_text text="EffectiveLength" /> | |
601 <has_text text="TPM" /> | |
602 <has_text text="NM_001168316" /> | |
603 <has_text text="NM_174914" /> | |
604 <has_text text="NM_018953" /> | |
605 <has_text text="NR_003084" /> | |
606 <has_text text="NM_017410" /> | |
607 <has_text text="NM_153693" /> | |
608 <has_text text="NR_031764" /> | |
609 <has_n_columns n="5" /> | |
610 </assert_contents> | |
611 </output> | |
612 <output name="output_gene_quant"> | |
613 <assert_contents> | |
614 <has_text text="EffectiveLength" /> | |
615 <has_text text="TPM" /> | |
616 <has_text text="baz" /> | |
617 <has_text text="bar" /> | |
618 <has_text text="2283" /> | |
619 <has_text text="1640" /> | |
620 <has_n_columns n="5" /> | |
621 </assert_contents> | |
622 </output> | |
623 </test> | |
624 <test> | |
625 <param name="single_or_paired_opts" value="paired" /> | |
626 <param name="input_mate1" value="reads_1.fastq" /> | |
627 <param name="input_mate2" value="reads_2.fastq" /> | |
628 <param name="TranscriptSource" value="history" /> | |
629 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
630 <param name="geneMap" value="gene_map.tab" ftype="tabular" /> | |
631 <param name="validateMappings" value="True" /> | |
632 <output name="output_quant"> | |
633 <assert_contents> | |
634 <has_text text="EffectiveLength" /> | |
635 <has_text text="TPM" /> | |
636 <has_text text="NM_001168316" /> | |
637 <has_text text="NM_174914" /> | |
638 <has_text text="NM_018953" /> | |
639 <has_text text="NR_003084" /> | |
640 <has_text text="NM_017410" /> | |
641 <has_text text="NM_153693" /> | |
642 <has_text text="NR_031764" /> | |
643 <has_n_columns n="5" /> | |
644 </assert_contents> | |
645 </output> | |
646 <output name="output_gene_quant"> | |
647 <assert_contents> | |
648 <has_text text="EffectiveLength" /> | |
649 <has_text text="TPM" /> | |
650 <has_text text="baz" /> | |
651 <has_text text="bar" /> | |
652 <has_text text="2283" /> | |
653 <has_text text="1640" /> | |
654 <has_n_columns n="5" /> | |
655 </assert_contents> | |
656 </output> | |
657 </test> | |
658 <test> | |
659 <param name="single_or_paired_opts" value="paired_collection" /> | |
660 <param name="input_1"> | |
661 <collection type="paired"> | |
662 <element name="forward" value="reads_1.fastq" ftype="fastqsanger" /> | |
663 <element name="reverse" value="reads_2.fastq" ftype="fastqsanger" /> | |
664 </collection> | |
665 </param> | |
666 <param name="TranscriptSource" value="history" /> | |
667 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
668 <param name="geneMap" value="gene_map.tab" ftype="tabular" /> | |
669 <param name="validateMappings" value="True" /> | |
670 <output name="output_quant"> | |
671 <assert_contents> | |
672 <has_text text="EffectiveLength" /> | |
673 <has_text text="TPM" /> | |
674 <has_text text="NM_001168316" /> | |
675 <has_text text="NM_174914" /> | |
676 <has_text text="NM_018953" /> | |
677 <has_text text="NR_003084" /> | |
678 <has_text text="NM_017410" /> | |
679 <has_text text="NM_153693" /> | |
680 <has_text text="NR_031764" /> | |
681 <has_n_columns n="5" /> | |
682 </assert_contents> | |
683 </output> | |
684 <output name="output_gene_quant"> | |
685 <assert_contents> | |
686 <has_text text="EffectiveLength" /> | |
687 <has_text text="TPM" /> | |
688 <has_text text="baz" /> | |
689 <has_text text="bar" /> | |
690 <has_text text="2283" /> | |
691 <has_text text="1640" /> | |
692 <has_n_columns n="5" /> | |
693 </assert_contents> | |
694 </output> | |
695 </test> | |
696 <test> | |
697 <param name="single_or_paired_opts" value="paired" /> | |
698 <param name="input_mate1" value="reads_1.fastq" /> | |
699 <param name="input_mate2" value="reads_2.fastq" /> | |
700 <param name="TranscriptSource" value="history" /> | |
701 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
702 <param name="geneMap" value="gene_map.tab" ftype="tabular" /> | |
703 <param name="useEM" value="True" /> | |
704 <output name="output_quant"> | |
705 <assert_contents> | |
706 <has_text text="EffectiveLength" /> | |
707 <has_text text="TPM" /> | |
708 <has_text text="NM_001168316" /> | |
709 <has_text text="NM_174914" /> | |
710 <has_text text="NM_018953" /> | |
711 <has_text text="NR_003084" /> | |
712 <has_text text="NM_017410" /> | |
713 <has_text text="NM_153693" /> | |
714 <has_text text="NR_031764" /> | |
715 <has_n_columns n="5" /> | |
716 </assert_contents> | |
717 </output> | |
718 <output name="output_gene_quant"> | |
719 <assert_contents> | |
720 <has_text text="EffectiveLength" /> | |
721 <has_text text="TPM" /> | |
722 <has_text text="baz" /> | |
723 <has_text text="bar" /> | |
724 <has_text text="2283" /> | |
725 <has_text text="1640" /> | |
726 <has_n_columns n="5" /> | |
727 </assert_contents> | |
728 </output> | |
729 </test> | |
730 <test> | |
731 <param name="single_or_paired_opts" value="paired_collection" /> | |
732 <param name="input_1"> | |
733 <collection type="paired"> | |
734 <element name="forward" value="reads_1.fastq" ftype="fastqsanger" /> | |
735 <element name="reverse" value="reads_2.fastq" ftype="fastqsanger" /> | |
736 </collection> | |
737 </param> | |
738 <param name="TranscriptSource" value="history" /> | |
739 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
740 <param name="geneMap" value="gene_map.tab" ftype="tabular" /> | |
741 <param name="useEM" value="True" /> | |
742 <output name="output_quant"> | |
743 <assert_contents> | |
744 <has_text text="EffectiveLength" /> | |
745 <has_text text="TPM" /> | |
746 <has_text text="NM_001168316" /> | |
747 <has_text text="NM_174914" /> | |
748 <has_text text="NM_018953" /> | |
749 <has_text text="NR_003084" /> | |
750 <has_text text="NM_017410" /> | |
751 <has_text text="NM_153693" /> | |
752 <has_text text="NR_031764" /> | |
753 <has_n_columns n="5" /> | |
754 </assert_contents> | |
755 </output> | |
756 <output name="output_gene_quant"> | |
757 <assert_contents> | |
758 <has_text text="EffectiveLength" /> | |
759 <has_text text="TPM" /> | |
760 <has_text text="baz" /> | |
761 <has_text text="bar" /> | |
762 <has_text text="2283" /> | |
763 <has_text text="1640" /> | |
764 <has_n_columns n="5" /> | |
765 </assert_contents> | |
766 </output> | |
767 </test> | |
768 </tests> | |
769 | |
770 <help><![CDATA[ | |
771 **What it does** | |
772 salmon is a tool for transcript quantification from RNA-seq data. It | |
773 requires a set of target transcripts (either from a reference or de-novo | |
774 assembly) to quantify. All you need to run Salmon is a fasta file containing | |
775 your reference transcripts and a (set of) fasta/fastq file(s) containing your | |
776 reads. Salmon runs in two phases; indexing and quantification. The indexing | |
777 step is independent of the reads, and only need to be run one for a particular | |
778 set of reference transcripts and choice of k (the k-mer size). The | |
779 quantification step, obviously, is specific to the set of RNA-seq reads and is | |
780 thus run more frequently. | |
781 When the quantification output contains a number of columns: | |
782 (1) Transcript ID, | |
783 (2) Transcript Length, | |
784 (3) Transcripts per Million (TPM) and | |
785 (4) Estimated number of reads (an estimate of the number of reads drawn from this transcript given the transcript’s relative abundance and length). | |
786 The first two columns are self-explanatory, the next four are measures of transcript abundance and the final is a commonly used input for differential expression tools. | |
787 The Transcripts per Million quantification number is computed as described in [1], and is meant as an estimate of the number of transcripts, per million observed transcripts, | |
788 originating from each isoform. Its benefit over the F/RPKM measure is that it is independent of the mean expressed transcript length | |
789 (i.e. if the mean expressed transcript length varies between samples, for example, this alone can affect differential analysis based on the K/RPKM.). | |
790 | |
791 | |
792 Fragment Library Types | |
793 ====================== | |
794 | |
795 There are numerous library preparation protocols for RNA-seq that result in | |
796 sequencing reads with different characteristics. For example, reads can be | |
797 single end (only one side of a fragment is recorded as a read) or paired-end | |
798 (reads are generated from both ends of a fragment). Further, the sequencing | |
799 reads themselves may be unstraned or strand-specific. Finally, paired-end | |
800 protocols will have a specified relative orientation. To characterize the | |
801 various different typs of sequencing libraries, we've created a miniature | |
802 "language" that allows for the succinct description of the many different types | |
803 of possible fragment libraries. For paired-end reads, the possible | |
804 orientations, along with a graphical description of what they mean, are | |
805 illustrated below: | |
806 .. image:: ReadLibraryIllustration.png | |
807 The library type string consists of three parts: the relative orientation of | |
808 the reads, the strandedness of the library, and the directionality of the | |
809 reads. | |
810 The first part of the library string (relative orientation) is only provided if | |
811 the library is paired-end. The possible options are: | |
812 :: | |
813 | |
814 I = inward | |
815 O = outward | |
816 M = matching | |
817 | |
818 The second part of the read library string specifies whether the protocol is | |
819 stranded or unstranded; the options are: | |
820 :: | |
821 | |
822 S = stranded | |
823 U = unstranded | |
824 | |
825 If the protocol is unstranded, then we're done. The final part of the library | |
826 string specifies the strand from which the read originates in a strand-specific | |
827 protocol — it is only provided if the library is stranded (i.e. if the | |
828 library format string is of the form S). The possible values are: | |
829 :: | |
830 | |
831 F = read 1 (or single-end read) comes from the forward strand | |
832 R = read 1 (or single-end read) comes from the reverse strand | |
833 | |
834 So, for example, if you wanted to specify a fragment library of strand-specific | |
835 paired-end reads, oriented toward each other, where read 1 comes from the | |
836 forward strand and read 2 comes from the reverse strand, you would specify ``-l | |
837 ISF`` on the command line. This designates that the library being processed has | |
838 the type "ISF" meaning, **I**\ nward (the relative orientation), **S**\ tranded | |
839 (the protocol is strand-specific), **F**\ orward (read 1 comes from the forward | |
840 strand). | |
841 The single end library strings are a bit simpler than their pair-end counter | |
842 parts, since there is no relative orientation of which to speak. Thus, the | |
843 only possible library format types for single-end reads are ``U`` (for | |
844 unstranded), ``SF`` (for strand-specific reads coming from the forward strand) | |
845 and ``SR`` (for strand-specific reads coming from the reverse strand). | |
846 A few more examples of some library format strings and their interpretations are: | |
847 :: | |
848 | |
849 IU (an unstranded paired-end library where the reads face each other) | |
850 | |
851 :: | |
852 | |
853 SF (a stranded single-end protocol where the reads come from the forward strand) | |
854 | |
855 :: | |
856 | |
857 OSR (a stranded paired-end protocol where the reads face away from each other, | |
858 read1 comes from reverse strand and read2 comes from the forward strand) | |
859 | |
860 .. note:: Correspondence to TopHat library types | |
861 | |
862 The popular `TopHat <http://ccb.jhu.edu/software/tophat/index.shtml>`_ RNA-seq | |
863 read aligner has a different convention for specifying the format of the library. | |
864 Below is a table that provides the corresponding Salmon/salmon library format | |
865 string for each of the potential TopHat library types: | |
866 | |
867 +---------------------+-------------------------+ | |
868 | TopHat | Salmon (and Sailfish) | | |
869 +=====================+============+============+ | |
870 | | Paired-end | Single-end | | |
871 +---------------------+------------+------------+ | |
872 |``-fr-unstranded`` |``-l IU`` |``-l U`` | | |
873 +---------------------+------------+------------+ | |
874 |``-fr-firststrand`` |``-l ISR`` |``-l SR`` | | |
875 +---------------------+------------+------------+ | |
876 |``-fr-secondstrand`` |``-l ISF`` |``-l SF`` | | |
877 +---------------------+------------+------------+ | |
878 | |
879 The remaining salmon library format strings are not directly expressible in terms | |
880 of the TopHat library types, and so there is no direct mapping for them. | |
881 ]]> </help> | |
882 <citations> | |
883 <citation type="doi">10.1101/021592</citation> | |
884 </citations> | |
885 </tool> |