comparison old/salmon.xml @ 11:666bb48b1007 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 02087ce2966cf8b4aac9197a41171e7f986c11d1-dirty"
author bgruening
date Wed, 02 Oct 2019 04:31:53 -0400
parents
children
comparison
equal deleted inserted replaced
10:4de6e2e40c7a 11:666bb48b1007
1 <tool id="salmon" name="Salmon" version="@VERSION@">
2
3 <description>Transcript Quantification from RNA-seq data</description>
4
5 <macros>
6 <xml name="strandedness">
7 <param name="strandedness" type="select" label="Specify the strandedness of the reads">
8 <option value="U" selected="True">Not stranded (U)</option>
9 <option value="SF">read 1 (or single-end read) comes from the forward strand (SF)</option>
10 <option value="SR">read 1 (or single-end read) comes from the reverse strand (SR)</option>
11 </param>
12 </xml>
13 <xml name="orientation">
14 <param name="orientation" type="select" label="Relative orientation of reads within a pair">
15 <option value="M">Mates are oriented in the same direction (M = matching)</option>
16 <option value="O">Mates are oriented away from each other (O = outward)</option>
17 <option value="I" selected="True">Mates are oriented toward each other (I = inward)</option>
18 </param>
19 </xml>
20 <token name="@VERSION@">0.11.2</token>
21 <token name="@IDX_VERSION@">q5</token>
22 </macros>
23
24 <requirements>
25 <requirement type="package" version="1.0.6">bzip2</requirement>
26 <requirement type="package" version="@VERSION@">salmon</requirement>
27 <requirement type="package" version="1.2">seqtk</requirement>
28 </requirements>
29
30 <stdio>
31 <exit_code range="1:" />
32 <exit_code range=":-1" />
33 <regex match="Error:" />
34 <regex match="Exception:" />
35 <regex match="Exception :" />
36 </stdio>
37 <version_command>salmon -version</version_command>
38 <command><![CDATA[
39 mkdir ./index
40 &&
41 mkdir ./output
42 #if $refTranscriptSource.TranscriptSource == "history":
43 &&
44 salmon index
45 --transcripts $refTranscriptSource.ownFile
46 --kmerLen $refTranscriptSource.kmerLen
47 --threads "\${GALAXY_SLOTS:-4}"
48 --index './index'
49 --type '$quasi_orphans.type'
50 $perfectHash
51 #set $index_path = './index'
52 #else:
53 #set $index_path = $refTranscriptSource.index.fields.path
54 #end if
55 &&
56 #set compressed = 'no'
57 #if $single_or_paired.single_or_paired_opts == 'single':
58 #if $single_or_paired.input_singles.ext == 'fasta':
59 #set $ext = 'fasta'
60 #else:
61 #if $single_or_paired.input_singles.is_of_type("fastq.gz", "fastqsanger.gz"):
62 #set compressed = 'GZ'
63 #else if $single_or_paired.input_singles.is_of_type("fastq.bz2", "fastqsanger.bz2"):
64 #set compressed = 'BZ2'
65 #end if
66 #set $ext = 'fastq'
67 #end if
68 ln -s $single_or_paired.input_singles ./single.$ext &&
69 #else if $single_or_paired.single_or_paired_opts == 'paired':
70 #if $single_or_paired.input_mate1.ext == 'fasta':
71 #set $ext = 'fasta'
72 #else:
73 #if $single_or_paired.input_mate1.is_of_type("fastq.gz", "fastqsanger.gz"):
74 #set compressed = 'GZ'
75 #else if $single_or_paired.input_mate1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
76 #set compressed = 'BZ2'
77 #end if
78 #set $ext = 'fastq'
79 #end if
80 ln -s $single_or_paired.input_mate1 ./mate1.$ext &&
81 ln -s $single_or_paired.input_mate2 ./mate2.$ext &&
82 #else if $single_or_paired.single_or_paired_opts == 'paired_collection':
83 #if $single_or_paired.input_1.forward.ext == 'fasta':
84 #set $ext = 'fasta'
85 #else:
86 #if $single_or_paired.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
87 #set compressed = 'GZ'
88 #else if $single_or_paired.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
89 #set compressed = 'BZ2'
90 #end if
91 #set $ext = 'fastq'
92 #end if
93 ln -s ${single_or_paired.input_1.forward} ./mate1.$ext &&
94 ln -s ${single_or_paired.input_1.reverse} ./mate2.$ext &&
95 #else if $single_or_paired.single_or_paired_opts == 'paired_interleaved':
96 #if $single_or_paired.input_1.ext == 'fasta':
97 #set $ext = 'fasta'
98 #else:
99 #if $single_or_paired.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
100 #set compressed = 'GZ'
101 #else if $single_or_paired.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
102 #set compressed = 'BZ2'
103 #end if
104 #set $ext = 'fastq'
105 #end if
106 ln -s $single_or_paired.input_1 ./mate1.$ext &&
107 #end if
108 #if $geneMap:
109 ln -s "$geneMap" ./geneMap.${geneMap.ext} &&
110 #end if
111 salmon quant
112 --index $index_path
113 #if $single_or_paired.single_or_paired_opts == 'single':
114 --libType ${single_or_paired.strandedness}
115 #if $compressed == 'GZ':
116 --unmatedReads <(zcat < ./single.$ext)
117 #else if $compressed == 'BZ2':
118 --unmatedReads <(bzcat < ./single.$ext)
119 #else:
120 --unmatedReads ./single.$ext
121 #end if
122 #else:
123 --libType "${single_or_paired.orientation}${single_or_paired.strandedness}"
124 #if $single_or_paired.single_or_paired_opts == 'paired_interleaved':
125 #if $compressed == 'BZ2':
126 --mates1 <(bzcat < ./mate1.$ext | seqtk seq -1)
127 --mates2 <(bzcat < ./mate1.$ext | seqtk seq -2)
128 #else:
129 --mates1 <(seqtk seq -1 ./mate1.$ext)
130 --mates2 <(seqtk seq -2 ./mate1.$ext)
131 #end if
132 #else:
133 #if $compressed == 'GZ':
134 --mates1 <(zcat < ./mate1.$ext)
135 --mates2 <(zcat < ./mate2.$ext)
136 #else if $compressed == 'BZ2':
137 --mates1 <(bzcat < ./mate1.$ext)
138 --mates2 <(bzcat < ./mate2.$ext)
139 #else:
140 --mates1 ./mate1.$ext
141 --mates2 ./mate2.$ext
142 #end if
143 #end if
144 #end if
145 --output ./output
146 #if str($quasi_orphans.type) == 'quasi':
147 --allowOrphans
148 $quasi_orphans.validateMappings
149 --ma $quasi_orphans.matchScore
150 --mp $quasi_orphans.mismatchPenalty
151 --go $quasi_orphans.gapOpenPenalty
152 --ge $quasi_orphans.gapExtensionPenalty
153 --minScoreFraction $quasi_orphans.minScoreFraction
154 #end if
155 $seqBias
156 $gcBias
157 $noErrorModel
158 --threads "\${GALAXY_SLOTS:-4}"
159 --incompatPrior $adv.incompatPrior
160 $adv.consistentHits
161 $adv.dumpEq
162 $adv.reduceGCMemory
163 #if str($adv.biasSpeedSamp):
164 --biasSpeedSamp $adv.biasSpeedSamp
165 #end if
166 $adv.strictIntersect
167 #if str($adv.fldMax):
168 --fldMax $adv.fldMax
169 #end if
170 #if str($adv.fldMean):
171 --fldMean $adv.fldMean
172 #end if
173 #if str($adv.fldSD):
174 --fldSD $adv.fldSD
175 #end if
176 #if $adv.forgettingFactor:
177 --forgettingFactor $adv.forgettingFactor
178 #end if
179 $adv.initUniform
180 $adv.noFragLengthDist
181 $adv.noBiasLengthThreshold
182 #if str($adv.maxReadOcc):
183 --maxReadOcc $adv.maxReadOcc
184 #end if
185 #if $geneMap:
186 --geneMap ./geneMap.${geneMap.ext}
187 #end if
188 $adv.noEffectiveLengthCorrection
189 $adv.useEM
190 #if str($adv.numBiasSamples):
191 --numBiasSamples $adv.numBiasSamples
192 #end if
193 #if str($adv.numAuxModelSamples):
194 --numAuxModelSamples $adv.numAuxModelSamples
195 #end if
196 #if str($adv.numPreAuxModelSamples):
197 --numPreAuxModelSamples $adv.numPreAuxModelSamples
198 #end if
199 #if str($adv.numGibbsSamples):
200 --numGibbsSamples $adv.numGibbsSamples
201 #end if
202 #if str($adv.numBootstraps):
203 --numBootstraps $adv.numBootstraps
204 #end if
205 #if str($adv.consensusSlack):
206 --consensusSlack $adv.consensusSlack
207 #else:
208 #if $quasi_orphans.validateMappings:
209 --consensusSlack 1
210 #else:
211 --consensusSlack 0
212 #end if
213 #end if
214 $adv.perTranscriptPrior
215 #if $adv.vbPrior:
216 --vbPrior $adv.vbPrior
217 #end if
218 $adv.writeUnmappedNames
219 --sigDigits $adv.sigDigits
220 #if str($adv.writeMappings):
221 $adv.writeMappings > ${output_sam}
222 #end if
223 ]]>
224 </command>
225
226 <inputs>
227 <conditional name="refTranscriptSource">
228 <param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?" help="Built-ins were indexed using default options">
229 <option value="indexed">Use a built-in index</option>
230 <option value="history" selected="True">Use one from the history</option>
231 </param>
232 <when value="indexed">
233 <param name="index" type="select" label="Select a reference transcriptome" help="If your transcriptome of interest is not listed, contact your Galaxy admin">
234 <options from_data_table="salmon_indexes_versioned">
235 <filter type="sort_by" column="2"/>
236 <filter type="static_value" column="4" value="@IDX_VERSION@" />
237 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
238 </options>
239 </param>
240 </when> <!-- build-in -->
241 <when value="history">
242 <param name="ownFile" type="data" format="fasta" label="Select the reference transcriptome" help="in FASTA format" />
243 <param argument="kmerLen" type="integer" value="31" label="The size should be odd number."/>
244 </when> <!-- history -->
245 </conditional>
246 <conditional name="single_or_paired">
247 <param name="single_or_paired_opts" type="select" label="Is this library mate-paired?">
248 <option value="single">Single-end</option>
249 <option value="paired">Paired-end</option>
250 <option value="paired_collection">Paired-end Dataset Collection</option>
251 <option value="paired_interleaved">Paired-end data from single interleaved dataset</option>
252 </param>
253 <when value="single">
254 <param name="input_singles" type="data" format="fastq,fasta,fastq.gz,fastq.bz2" label="FASTQ/FASTA file" help="FASTQ file." />
255 <expand macro="strandedness" />
256 </when>
257 <when value="paired">
258 <param name="input_mate1" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 1" help="FASTQ file." />
259 <param name="input_mate2" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 2" help="FASTQ file." />
260 <expand macro="orientation" />
261 <expand macro="strandedness" />
262 </when>
263 <when value="paired_collection">
264 <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
265 <expand macro="orientation" />
266 <expand macro="strandedness" />
267 </when>
268 <when value="paired_interleaved">
269 <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="Interleaved FASTQ file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;. --interleaved"/>
270 <expand macro="orientation" />
271 <expand macro="strandedness" />
272 </when>
273 </conditional>
274 <conditional name="quasi_orphans">
275 <param argument="--type" type="select" label="Type of index" help="When using quasi, orphaned reads will be considered when performing lightweight-alignment.">
276 <option value="quasi" selected="True">quasi</option>
277 </param>
278 <when value="quasi">
279 <param argument="--validateMappings" type="boolean" truevalue="--validateMappings" falsevalue="" checked="False"
280 label="Validate mappings"
281 help="Validate mappings using alignment-based verifcation. If this flag is passed, quasi-mappings will be validated to ensure that they could give rise to a reasonable alignment before they are further used for quantification."/>
282 <param name="matchScore" argument="--ma" type="integer" value="2"
283 label="Match Score"
284 help="The value given to a match between read and reference nucleotides in an alignment."/>
285 <param name="mismatchPenalty" argument="--mp" type="integer" value="4"
286 label="Mismatch Penalty"
287 help="The value given to a mis-match between read and reference nucleotides in an alignment. This will be cast to a negative value."/>
288 <param name="gapOpenPenalty" argument="--go" type="integer" value="5"
289 label="Gap Open Penalty"
290 help="The value given to a gap opening in an alignment."/>
291 <param name="gapExtensionPenalty" argument="--ge" type="integer" value="3"
292 label="Gap Extension Penalty"
293 help="The value given to a gap extension in an alignment."/>
294 <param argument="--minScoreFraction" type="float" value="0.65" min="0.0" max="0.99"
295 label="Min Score Fraction"
296 help="The fraction of the optimal possible alignment score that a mapping must achieve in order to be considered valid. Should be in (0,1]."/>
297 </when> <!-- build-in -->
298 </conditional>
299 <param argument="--perfectHash" type="boolean" truevalue="--perfectHash" falsevalue="" checked="False"
300 label="Perfect Hash"
301 help="Build the index using a perfect hash rather than a dense hash. This will require less memory (especially during quantification), but will take longer to construct "/>
302 <param argument="--seqBias" type="boolean" truevalue="--seqBias" falsevalue="" checked="False"
303 label="Perform sequence-specific bias correction"
304 help=""/>
305 <param argument="--gcBias" type="boolean" truevalue="--gcBias" falsevalue="" checked="False"
306 label="Perform fragment GC bias correction"
307 help=""/>
308 <param argument="--geneMap" type="data" format="tabular,gff,gtf" optional="True"
309 label="File containing a mapping of transcripts to genes"
310 help="If this file is provided Salmon will output both quant.sf and quant.genes.sf files, where the latter contains aggregated gene-level abundance estimates. The transcript to gene mapping should be provided as either a GTF file, or a in a simple tab-delimited format where each line contains the name of a transcript and the gene to which it belongs separated by a tab." />
311 <param argument="--noErrorModel" type="boolean" truevalue="--noErrorModel" falsevalue="" checked="False"
312 label="No Error Model"
313 help="Turn off the alignment error model, which takes into account the the observed frequency of different types of mismatches / indels when computing the likelihood of a given alignment. Turning this off can speed up alignment-based salmon, but can harm quantification accuracy."/>
314 <section name="adv" title="Additional Options">
315 <param argument="--writeMappings" type="boolean" truevalue="--writeMappings" falsevalue="" checked="False"
316 label="Write Mappings"
317 help="If this option is set to 'Yes', then the quasi-mapping results will be written out in SAM-compatible format. By default, output is directed to stdout." />
318 <param argument="--incompatPrior" type="float" optional="True" value="9.9999999999999995e-21"
319 label="Incompatible Prior"
320 help="This option sets the prior probability that an alignment that disagrees with the specified library type (--libType) results from the true fragment origin. Setting this to 0 specifies that alignments that disagree with the library type should be 'impossible', while setting it to 1 says that alignments that disagree with the library type are no less likely than those that do" />
321 <param argument="--dumpEq" type="boolean" truevalue="--dumpEq" falsevalue="" checked="False"
322 label="Dump the equivalence class counts that were computed during quasi-mapping." help=""/>
323 <param argument="--reduceGCMemory" type="boolean" truevalue="--reduceGCMemory" falsevalue="" optional="True" checked="False"
324 label="If this option is selected, a more memory efficient (but slightly slower representation is used to compute fragment GC content."
325 help="Enabling this will reduce memory usage, but can also reduce speed. However, the results themselves will remain the same."/>
326 <param argument="--biasSpeedSamp" type="integer" value="1" optional="True"
327 label="The value at which the fragment length PMF is down-sampled when evaluating GC fragment bias." help="Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results."/>
328 <param argument="--strictIntersect" type="boolean" truevalue="--strictIntersect" falsevalue="" checked="False"
329 label="Modifies how orphans are assigned." help="When this flag is set, if the intersection of the quasi-mappings for the left and right is empty, then all mappings for the left and all mappings for the right read are reported as orphaned quasi-mappings."/>
330 <param argument="--minLen" type="integer" value="19" optional="True"
331 label=" (S)MEMs smaller than this size won't be considered." help="" />
332 <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="False"
333 label="Perform sensitive quantification"
334 help=" Setting this option enables the splitting of SMEMs that are larger than 1.5 times the minimum seed length (minLen/k above). This may reveal high scoring chains of MEMs that are masked by long SMEMs. However, this option makes lightweight-alignment a bit slower and is usually not necessary if the reference is of reasonable quality." />
335 <param argument="--consistentHits" type="boolean" truevalue="--consistentHits" falsevalue="" checked="False"
336 label="Force hits gathered during quasi-mapping to be consistent"
337 help="" />
338 <param argument="--extraSensitive" type="boolean" truevalue="--extraSensitive" falsevalue="" checked="False"
339 label="Perform extra sensitive quantification"
340 help="Setting this option enables an extra pass of 'seed' search. Enabling this option may improve sensitivity (the number of reads having sufficient coverage), but will typically slow down quantification by ~40%. Consider enabling this option if you find the mapping rate to be significantly lower than expected."/>
341 <param argument="--coverage" type="float" value="0.69999999999999996" optional="True"
342 label="Required coverage of read by union of SMEMs to consider it a hit"
343 help="" />
344 <param argument="--fldMax" type="integer" value="1000" optional="True"
345 label="The maximum fragment length to consider when building the empirical distribution."
346 help=""/>
347 <param argument="--fldMean" type="integer" value="200" optional="True"
348 label="The mean used in the fragment length distribution prior"
349 help="If single end reads are being used for quantification, or there are an insufficient number of uniquely mapping reads when performing paired-end quantification to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/>
350 <param argument="--fldSD" type="integer" value="80" optional="True"
351 label="Standard deviation"
352 help="The standard deviation used in the fragment length distribution prior."/>
353 <param argument="--forgettingFactor" type="float" value="0.65000000000000002" optional="True"
354 label="The forgetting factor used in the online learning schedule."
355 help=" A smaller value results in quicker learning, but higher variance and may be unstable. A larger value results in slower learning but may be more stable. Value should be in the interval (0.5, 1.0]." />
356 <param argument="--initUniform" type="boolean" truevalue="--initUniform" falsevalue="" checked="False"
357 label="Initialization with uniform parameters"
358 help="initialize the offline inference with uniform parameters, rather than seeding with online parameters." />
359 <param argument="--maxReadOcc" type="integer" value="100" optional="True"
360 label="Maximal read mapping occurence"
361 help="Reads mapping to more than this many places won't be considered."/>
362 <param argument="--noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False"
363 label="Disable effective length correction"
364 help="Disables effective length correction when computing the probability that a fragment was generated from a transcript. If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/>
365 <param argument="--noFragLengthDist" type="boolean" truevalue="--noFragLengthDist" falsevalue="" checked="False"
366 label="Ignore fragment length distribution"
367 help="[experimental] : Don't consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location. Normally, Fragments with unlikely lengths will be assigned a smaller relative probability than those with more likely lengths. When this flag is passed in, the observed fragment length has no effect on that fragment's a priori probability." />
368 <param argument="--noBiasLengthThreshold" type="boolean" truevalue="--noBiasLengthThreshold" falsevalue="" checked="False"
369 label="[experimental] : If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effecctive lengths."
370 help="This can increase the precision of bias correction, but harm robustness. The default correction applies a threshold." />
371 <param argument="--numBiasSamples" type="integer" value="2000000" optional="True"
372 label="Number of fragment mappings to use when learning the sequence-specific bias model."
373 help="" />
374 <param argument="--numAuxModelSamples" type="integer" value="5000000" optional="True"
375 label="The first numAuxModelSamples are used to train the auxiliary model parameters."
376 help="(e.g. fragment length distribution, bias, etc.). After ther first numAuxModelSamples observations the auxiliary model parameters will be assumed to have converged and will be fixed." />
377 <param argument="--numPreAuxModelSamples" type="integer" value="1000000" optional="True"
378 label="The first numPreAuxModelSamples will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models."
379 help=" The purpose of ignoring the auxiliary models for the first numPreAuxModelSamples observations is to avoid applying these models before thier parameters have been learned sufficiently well." />
380 <param argument="--splitWidth" type="integer" value="0" optional="True"
381 label=" If (S)MEM occurs fewer than this many times, search for smaller, contained MEMs"
382 help="The default value will not split (S)MEMs, a higher value will result in more MEMs being explore and, thus, will result in increased running time." />
383 <param argument="--splitSpanningSeeds" type="boolean" truevalue="--splitSpanningSeeds" falsevalue="" checked="False"
384 label="Attempt to split seeds that happen to fall on the boundary between two transcripts."
385 help="This can improve the fragment hit-rate, but is usually not necessary."/>
386 <param argument="--useEM" type="boolean" truevalue="--useEM" falsevalue="" checked="False"
387 label="Use the traditional EM algorithm for optimization in the batch passes."
388 help=""/>
389 <param argument="--numGibbsSamples" type="integer" value="0" optional="True"
390 label=" Number of Gibbs sampling rounds to perform."
391 help="" />
392 <param argument="--numBootstraps" type="integer" value="0" optional="True"
393 label="Number of bootstrap samples to generate. Note: This is mutually exclusive with Gibbs sampling."
394 help="" />
395 <param argument="--perTranscriptPrior" type="boolean" truevalue="--perTranscriptPrior" falsevalue="" checked="False"
396 label="The prior will be interpreted as a transcript-level prior."
397 help="either the default or the argument provided via --vbPrior" />
398 <param argument="--vbPrior" type="float" value="0.001" optional="True"
399 label="The prior that will be used in the VBEM algorithm."
400 help="This is interpreted as a per-nucleotide prior, unless the --perTranscriptPrior flag is also given, in which case this is used as a transcript-level prior." />
401 <param argument="--writeUnmappedNames" type="boolean" truevalue="--writeUnmappedNames" falsevalue="" checked="False"
402 label="Write the names of un-mapped reads to the file unmapped_names.txt."
403 help=""/>
404 <param argument="--sigDigits" type="integer" value="3"
405 label="Significant Digits"
406 help="The number of significant digits to write when outputting the EffectiveLength and NumReads columns."/>
407 <param argument="--consensusSlack" type="integer" optional="True"
408 label="Consensus Slack"
409 help="The amount of slack allowed in the quasi-mapping consensus mechanism. Normally, a transcript must cover all hits to be considered for mapping. If this is set to a value, X, greater than 0, then a transcript can fail to cover up to X hits before it is discounted as a mapping candidate. The default value of this option is 1 if --validateMappings is given and 0 otherwise."/>
410 </section>
411 </inputs>
412
413 <outputs>
414 <data name="output_quant" format="tabular" from_work_dir="output/quant.sf" label="${tool.name} on ${on_string} (Quantification)" />
415 <data name="output_gene_quant" format="tabular" from_work_dir="output/quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)">
416 <filter>geneMap</filter>
417 </data>
418 <data name="output_sam" format="sam" label="${tool.name} on ${on_string} (SAM format)">
419 <filter>adv['writeMappings']</filter>
420 </data>
421 </outputs>
422
423 <tests>
424 <test>
425 <param name="single_or_paired_opts" value="paired" />
426 <param name="input_mate1" value="reads_1.fastq" />
427 <param name="input_mate2" value="reads_2.fastq" />
428 <param name="biasCorrect" value="False" />
429 <param name="TranscriptSource" value="history" />
430 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
431 <output name="output_quant">
432 <assert_contents>
433 <has_text text="EffectiveLength" />
434 <has_text text="TPM" />
435 <has_text text="NM_001168316" />
436 <has_text text="NM_174914" />
437 <has_text text="NM_018953" />
438 <has_text text="NR_003084" />
439 <has_text text="NM_017410" />
440 <has_text text="NM_153693" />
441 <has_text text="NR_031764" />
442 <has_n_columns n="5" />
443 </assert_contents>
444 </output>
445 </test>
446 <test> <!--test use of built-in index-->
447 <param name="single_or_paired_opts" value="paired" />
448 <param name="input_mate1" value="reads_1.fastq" />
449 <param name="input_mate2" value="reads_2.fastq" />
450 <param name="biasCorrect" value="False" />
451 <param name="TranscriptSource" value="indexed" />
452 <param name="index" value="hg19_transcript_subset" />
453 <output name="output_quant">
454 <assert_contents>
455 <has_text text="EffectiveLength" />
456 <has_text text="TPM" />
457 <has_text text="NM_001168316" />
458 <has_text text="NM_174914" />
459 <has_text text="NM_018953" />
460 <has_text text="NR_003084" />
461 <has_text text="NM_017410" />
462 <has_text text="NM_153693" />
463 <has_text text="NR_031764" />
464 <has_n_columns n="5" />
465 </assert_contents>
466 </output>
467 </test>
468 <test> <!-- gzipped input -->
469 <param name="single_or_paired_opts" value="paired" />
470 <param name="input_mate1" value="reads_1.fastq.gz" ftype="fastqsanger.gz" />
471 <param name="input_mate2" value="reads_2.fastq.gz" ftype="fastqsanger.gz" />
472 <param name="biasCorrect" value="False" />
473 <param name="TranscriptSource" value="history" />
474 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
475 <output name="output_quant">
476 <assert_contents>
477 <has_text text="EffectiveLength" />
478 <has_text text="TPM" />
479 <has_text text="NM_001168316" />
480 <has_text text="NM_174914" />
481 <has_text text="NM_018953" />
482 <has_text text="NR_003084" />
483 <has_text text="NM_017410" />
484 <has_text text="NM_153693" />
485 <has_text text="NR_031764" />
486 <has_n_columns n="5" />
487 </assert_contents>
488 </output>
489 </test>
490 <test> <!-- bzipped input -->
491 <param name="single_or_paired_opts" value="paired" />
492 <param name="input_mate1" value="reads_1.fastq.bz2" ftype="fastqsanger.bz2" />
493 <param name="input_mate2" value="reads_2.fastq.bz2" ftype="fastqsanger.bz2" />
494 <param name="biasCorrect" value="False" />
495 <param name="TranscriptSource" value="history" />
496 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
497 <output name="output_quant">
498 <assert_contents>
499 <has_text text="EffectiveLength" />
500 <has_text text="TPM" />
501 <has_text text="NM_001168316" />
502 <has_text text="NM_174914" />
503 <has_text text="NM_018953" />
504 <has_text text="NR_003084" />
505 <has_text text="NM_017410" />
506 <has_text text="NM_153693" />
507 <has_text text="NR_031764" />
508 <has_n_columns n="5" />
509 </assert_contents>
510 </output>
511 </test>
512 <test> <!-- interleaved bz2 input -->
513 <param name="single_or_paired_opts" value="paired_interleaved" />
514 <param name="input_1" value="reads_both.fastq.bz2" ftype="fastqsanger.bz2" />
515 <param name="biasCorrect" value="False" />
516 <param name="TranscriptSource" value="history" />
517 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
518 <output name="output_quant">
519 <assert_contents>
520 <has_text text="EffectiveLength" />
521 <has_text text="TPM" />
522 <has_text text="NM_001168316" />
523 <has_text text="NM_174914" />
524 <has_text text="NM_018953" />
525 <has_text text="NR_003084" />
526 <has_text text="NM_017410" />
527 <has_text text="NM_153693" />
528 <has_text text="NR_031764" />
529 <has_n_columns n="5" />
530 </assert_contents>
531 </output>
532 </test>
533 <test> <!-- interleaved gz input -->
534 <param name="single_or_paired_opts" value="paired_interleaved" />
535 <param name="input_1" value="reads_both.fastq.gz" ftype="fastqsanger.gz" />
536 <param name="biasCorrect" value="False" />
537 <param name="TranscriptSource" value="history" />
538 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
539 <output name="output_quant">
540 <assert_contents>
541 <has_text text="EffectiveLength" />
542 <has_text text="TPM" />
543 <has_text text="NM_001168316" />
544 <has_text text="NM_174914" />
545 <has_text text="NM_018953" />
546 <has_text text="NR_003084" />
547 <has_text text="NM_017410" />
548 <has_text text="NM_153693" />
549 <has_text text="NR_031764" />
550 <has_n_columns n="5" />
551 </assert_contents>
552 </output>
553 </test>
554 <test>
555 <param name="single_or_paired_opts" value="paired" />
556 <param name="input_mate1" value="reads_1.fastq" />
557 <param name="input_mate2" value="reads_2.fastq" />
558 <param name="TranscriptSource" value="history" />
559 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
560 <param name="geneMap" value="gene_map.tab" ftype="tabular" />
561 <output name="output_quant">
562 <assert_contents>
563 <has_text text="EffectiveLength" />
564 <has_text text="TPM" />
565 <has_text text="NM_001168316" />
566 <has_text text="NM_174914" />
567 <has_text text="NM_018953" />
568 <has_text text="NR_003084" />
569 <has_text text="NM_017410" />
570 <has_text text="NM_153693" />
571 <has_text text="NR_031764" />
572 <has_n_columns n="5" />
573 </assert_contents>
574 </output>
575 <output name="output_gene_quant">
576 <assert_contents>
577 <has_text text="EffectiveLength" />
578 <has_text text="TPM" />
579 <has_text text="baz" />
580 <has_text text="bar" />
581 <has_text text="2283" />
582 <has_text text="1640" />
583 <has_n_columns n="5" />
584 </assert_contents>
585 </output>
586 </test>
587 <test>
588 <param name="single_or_paired_opts" value="paired_collection" />
589 <param name="input_1">
590 <collection type="paired">
591 <element name="forward" value="reads_1.fastq" ftype="fastqsanger" />
592 <element name="reverse" value="reads_2.fastq" ftype="fastqsanger" />
593 </collection>
594 </param>
595 <param name="TranscriptSource" value="history" />
596 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
597 <param name="geneMap" value="gene_map.tab" ftype="tabular" />
598 <output name="output_quant">
599 <assert_contents>
600 <has_text text="EffectiveLength" />
601 <has_text text="TPM" />
602 <has_text text="NM_001168316" />
603 <has_text text="NM_174914" />
604 <has_text text="NM_018953" />
605 <has_text text="NR_003084" />
606 <has_text text="NM_017410" />
607 <has_text text="NM_153693" />
608 <has_text text="NR_031764" />
609 <has_n_columns n="5" />
610 </assert_contents>
611 </output>
612 <output name="output_gene_quant">
613 <assert_contents>
614 <has_text text="EffectiveLength" />
615 <has_text text="TPM" />
616 <has_text text="baz" />
617 <has_text text="bar" />
618 <has_text text="2283" />
619 <has_text text="1640" />
620 <has_n_columns n="5" />
621 </assert_contents>
622 </output>
623 </test>
624 <test>
625 <param name="single_or_paired_opts" value="paired" />
626 <param name="input_mate1" value="reads_1.fastq" />
627 <param name="input_mate2" value="reads_2.fastq" />
628 <param name="TranscriptSource" value="history" />
629 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
630 <param name="geneMap" value="gene_map.tab" ftype="tabular" />
631 <param name="validateMappings" value="True" />
632 <output name="output_quant">
633 <assert_contents>
634 <has_text text="EffectiveLength" />
635 <has_text text="TPM" />
636 <has_text text="NM_001168316" />
637 <has_text text="NM_174914" />
638 <has_text text="NM_018953" />
639 <has_text text="NR_003084" />
640 <has_text text="NM_017410" />
641 <has_text text="NM_153693" />
642 <has_text text="NR_031764" />
643 <has_n_columns n="5" />
644 </assert_contents>
645 </output>
646 <output name="output_gene_quant">
647 <assert_contents>
648 <has_text text="EffectiveLength" />
649 <has_text text="TPM" />
650 <has_text text="baz" />
651 <has_text text="bar" />
652 <has_text text="2283" />
653 <has_text text="1640" />
654 <has_n_columns n="5" />
655 </assert_contents>
656 </output>
657 </test>
658 <test>
659 <param name="single_or_paired_opts" value="paired_collection" />
660 <param name="input_1">
661 <collection type="paired">
662 <element name="forward" value="reads_1.fastq" ftype="fastqsanger" />
663 <element name="reverse" value="reads_2.fastq" ftype="fastqsanger" />
664 </collection>
665 </param>
666 <param name="TranscriptSource" value="history" />
667 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
668 <param name="geneMap" value="gene_map.tab" ftype="tabular" />
669 <param name="validateMappings" value="True" />
670 <output name="output_quant">
671 <assert_contents>
672 <has_text text="EffectiveLength" />
673 <has_text text="TPM" />
674 <has_text text="NM_001168316" />
675 <has_text text="NM_174914" />
676 <has_text text="NM_018953" />
677 <has_text text="NR_003084" />
678 <has_text text="NM_017410" />
679 <has_text text="NM_153693" />
680 <has_text text="NR_031764" />
681 <has_n_columns n="5" />
682 </assert_contents>
683 </output>
684 <output name="output_gene_quant">
685 <assert_contents>
686 <has_text text="EffectiveLength" />
687 <has_text text="TPM" />
688 <has_text text="baz" />
689 <has_text text="bar" />
690 <has_text text="2283" />
691 <has_text text="1640" />
692 <has_n_columns n="5" />
693 </assert_contents>
694 </output>
695 </test>
696 <test>
697 <param name="single_or_paired_opts" value="paired" />
698 <param name="input_mate1" value="reads_1.fastq" />
699 <param name="input_mate2" value="reads_2.fastq" />
700 <param name="TranscriptSource" value="history" />
701 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
702 <param name="geneMap" value="gene_map.tab" ftype="tabular" />
703 <param name="useEM" value="True" />
704 <output name="output_quant">
705 <assert_contents>
706 <has_text text="EffectiveLength" />
707 <has_text text="TPM" />
708 <has_text text="NM_001168316" />
709 <has_text text="NM_174914" />
710 <has_text text="NM_018953" />
711 <has_text text="NR_003084" />
712 <has_text text="NM_017410" />
713 <has_text text="NM_153693" />
714 <has_text text="NR_031764" />
715 <has_n_columns n="5" />
716 </assert_contents>
717 </output>
718 <output name="output_gene_quant">
719 <assert_contents>
720 <has_text text="EffectiveLength" />
721 <has_text text="TPM" />
722 <has_text text="baz" />
723 <has_text text="bar" />
724 <has_text text="2283" />
725 <has_text text="1640" />
726 <has_n_columns n="5" />
727 </assert_contents>
728 </output>
729 </test>
730 <test>
731 <param name="single_or_paired_opts" value="paired_collection" />
732 <param name="input_1">
733 <collection type="paired">
734 <element name="forward" value="reads_1.fastq" ftype="fastqsanger" />
735 <element name="reverse" value="reads_2.fastq" ftype="fastqsanger" />
736 </collection>
737 </param>
738 <param name="TranscriptSource" value="history" />
739 <param name="ownFile" value="transcripts.fasta" ftype="fasta" />
740 <param name="geneMap" value="gene_map.tab" ftype="tabular" />
741 <param name="useEM" value="True" />
742 <output name="output_quant">
743 <assert_contents>
744 <has_text text="EffectiveLength" />
745 <has_text text="TPM" />
746 <has_text text="NM_001168316" />
747 <has_text text="NM_174914" />
748 <has_text text="NM_018953" />
749 <has_text text="NR_003084" />
750 <has_text text="NM_017410" />
751 <has_text text="NM_153693" />
752 <has_text text="NR_031764" />
753 <has_n_columns n="5" />
754 </assert_contents>
755 </output>
756 <output name="output_gene_quant">
757 <assert_contents>
758 <has_text text="EffectiveLength" />
759 <has_text text="TPM" />
760 <has_text text="baz" />
761 <has_text text="bar" />
762 <has_text text="2283" />
763 <has_text text="1640" />
764 <has_n_columns n="5" />
765 </assert_contents>
766 </output>
767 </test>
768 </tests>
769
770 <help><![CDATA[
771 **What it does**
772 salmon is a tool for transcript quantification from RNA-seq data. It
773 requires a set of target transcripts (either from a reference or de-novo
774 assembly) to quantify. All you need to run Salmon is a fasta file containing
775 your reference transcripts and a (set of) fasta/fastq file(s) containing your
776 reads. Salmon runs in two phases; indexing and quantification. The indexing
777 step is independent of the reads, and only need to be run one for a particular
778 set of reference transcripts and choice of k (the k-mer size). The
779 quantification step, obviously, is specific to the set of RNA-seq reads and is
780 thus run more frequently.
781 When the quantification output contains a number of columns:
782 (1) Transcript ID,
783 (2) Transcript Length,
784 (3) Transcripts per Million (TPM) and
785 (4) Estimated number of reads (an estimate of the number of reads drawn from this transcript given the transcript’s relative abundance and length).
786 The first two columns are self-explanatory, the next four are measures of transcript abundance and the final is a commonly used input for differential expression tools.
787 The Transcripts per Million quantification number is computed as described in [1], and is meant as an estimate of the number of transcripts, per million observed transcripts,
788 originating from each isoform. Its benefit over the F/RPKM measure is that it is independent of the mean expressed transcript length
789 (i.e. if the mean expressed transcript length varies between samples, for example, this alone can affect differential analysis based on the K/RPKM.).
790
791
792 Fragment Library Types
793 ======================
794
795 There are numerous library preparation protocols for RNA-seq that result in
796 sequencing reads with different characteristics. For example, reads can be
797 single end (only one side of a fragment is recorded as a read) or paired-end
798 (reads are generated from both ends of a fragment). Further, the sequencing
799 reads themselves may be unstraned or strand-specific. Finally, paired-end
800 protocols will have a specified relative orientation. To characterize the
801 various different typs of sequencing libraries, we've created a miniature
802 "language" that allows for the succinct description of the many different types
803 of possible fragment libraries. For paired-end reads, the possible
804 orientations, along with a graphical description of what they mean, are
805 illustrated below:
806 .. image:: ReadLibraryIllustration.png
807 The library type string consists of three parts: the relative orientation of
808 the reads, the strandedness of the library, and the directionality of the
809 reads.
810 The first part of the library string (relative orientation) is only provided if
811 the library is paired-end. The possible options are:
812 ::
813
814 I = inward
815 O = outward
816 M = matching
817
818 The second part of the read library string specifies whether the protocol is
819 stranded or unstranded; the options are:
820 ::
821
822 S = stranded
823 U = unstranded
824
825 If the protocol is unstranded, then we're done. The final part of the library
826 string specifies the strand from which the read originates in a strand-specific
827 protocol — it is only provided if the library is stranded (i.e. if the
828 library format string is of the form S). The possible values are:
829 ::
830
831 F = read 1 (or single-end read) comes from the forward strand
832 R = read 1 (or single-end read) comes from the reverse strand
833
834 So, for example, if you wanted to specify a fragment library of strand-specific
835 paired-end reads, oriented toward each other, where read 1 comes from the
836 forward strand and read 2 comes from the reverse strand, you would specify ``-l
837 ISF`` on the command line. This designates that the library being processed has
838 the type "ISF" meaning, **I**\ nward (the relative orientation), **S**\ tranded
839 (the protocol is strand-specific), **F**\ orward (read 1 comes from the forward
840 strand).
841 The single end library strings are a bit simpler than their pair-end counter
842 parts, since there is no relative orientation of which to speak. Thus, the
843 only possible library format types for single-end reads are ``U`` (for
844 unstranded), ``SF`` (for strand-specific reads coming from the forward strand)
845 and ``SR`` (for strand-specific reads coming from the reverse strand).
846 A few more examples of some library format strings and their interpretations are:
847 ::
848
849 IU (an unstranded paired-end library where the reads face each other)
850
851 ::
852
853 SF (a stranded single-end protocol where the reads come from the forward strand)
854
855 ::
856
857 OSR (a stranded paired-end protocol where the reads face away from each other,
858 read1 comes from reverse strand and read2 comes from the forward strand)
859
860 .. note:: Correspondence to TopHat library types
861
862 The popular `TopHat <http://ccb.jhu.edu/software/tophat/index.shtml>`_ RNA-seq
863 read aligner has a different convention for specifying the format of the library.
864 Below is a table that provides the corresponding Salmon/salmon library format
865 string for each of the potential TopHat library types:
866
867 +---------------------+-------------------------+
868 | TopHat | Salmon (and Sailfish) |
869 +=====================+============+============+
870 | | Paired-end | Single-end |
871 +---------------------+------------+------------+
872 |``-fr-unstranded`` |``-l IU`` |``-l U`` |
873 +---------------------+------------+------------+
874 |``-fr-firststrand`` |``-l ISR`` |``-l SR`` |
875 +---------------------+------------+------------+
876 |``-fr-secondstrand`` |``-l ISF`` |``-l SF`` |
877 +---------------------+------------+------------+
878
879 The remaining salmon library format strings are not directly expressible in terms
880 of the TopHat library types, and so there is no direct mapping for them.
881 ]]> </help>
882 <citations>
883 <citation type="doi">10.1101/021592</citation>
884 </citations>
885 </tool>