Mercurial > repos > bgruening > salmon
comparison salmon.xml @ 0:91f3a2147127 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit d7607abc9feea09f0f8a227ead4da09323e167bb
author | bgruening |
---|---|
date | Tue, 15 Nov 2016 11:36:41 -0500 |
parents | |
children | c1d822f84e1a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:91f3a2147127 |
---|---|
1 <tool id="salmon" name="Salmon" version="0.7.2"> | |
2 | |
3 <description>Transcript Quantification from RNA-seq data</description> | |
4 | |
5 <macros> | |
6 <xml name="strandedness"> | |
7 <param name="strandedness" type="select" label="Specify the strandedness of the reads"> | |
8 <option value="U" selected="True">Not stranded (U)</option> | |
9 <option value="SF">read 1 (or single-end read) comes from the forward strand (SF)</option> | |
10 <option value="SR">read 1 (or single-end read) comes from the reverse strand (SR)</option> | |
11 </param> | |
12 </xml> | |
13 </macros> | |
14 | |
15 <requirements> | |
16 <requirement type="package" version="0.7.2">salmon</requirement> | |
17 </requirements> | |
18 | |
19 <stdio> | |
20 <exit_code range="1:" /> | |
21 <exit_code range=":-1" /> | |
22 <regex match="Error:" /> | |
23 <regex match="Exception:" /> | |
24 <regex match="Exception :" /> | |
25 </stdio> | |
26 <version_command>salmon -version</version_command> | |
27 <command><![CDATA[ | |
28 mkdir ./index | |
29 && | |
30 mkdir ./output | |
31 && | |
32 #if $refTranscriptSource.TranscriptSource == "history": | |
33 salmon index | |
34 --transcripts $refTranscriptSource.ownFile | |
35 --kmerLen $refTranscriptSource.kmerLen | |
36 --threads "\${GALAXY_SLOTS:-4}" | |
37 --index './index' | |
38 --type '$quasi_orphans.type' | |
39 $perfectHash | |
40 #if str($sasamp): | |
41 --sasamp $sasamp | |
42 #end if | |
43 #set $index_path = './index' | |
44 #else: | |
45 #set $index_path = $refTranscriptSource.index.fields.path | |
46 #end if | |
47 && | |
48 #if $single_or_paired.single_or_paired_opts == 'single': | |
49 #if $single_or_paired.input_singles.ext == 'fasta': | |
50 #set $ext = 'fasta' | |
51 #else: | |
52 #set $ext = 'fastq' | |
53 #end if | |
54 ln -s $single_or_paired.input_singles ./single.$ext && | |
55 #else: | |
56 #if $single_or_paired.input_mate1.ext == 'fasta': | |
57 #set $ext = 'fasta' | |
58 #else: | |
59 #set $ext = 'fastq' | |
60 #end if | |
61 ln -s $single_or_paired.input_mate1 ./mate1.$ext && | |
62 ln -s $single_or_paired.input_mate2 ./mate2.$ext && | |
63 #end if | |
64 #if $geneMap: | |
65 ln -s "$geneMap" ./geneMap.${geneMap.ext} && | |
66 #end if | |
67 salmon quant | |
68 --index $index_path | |
69 #if $single_or_paired.single_or_paired_opts == 'single': | |
70 --libType ${single_or_paired.strandedness} | |
71 --unmatedReads ./single.$ext | |
72 #else: | |
73 --mates1 ./mate1.$ext | |
74 --mates2 ./mate2.$ext | |
75 --libType "${single_or_paired.orientation}${single_or_paired.strandedness}" | |
76 #end if | |
77 --output ./output | |
78 #if str($quasi_orphans.type) == 'quasi': | |
79 --allowOrphans | |
80 #else: | |
81 $quasi_orphans.allowOrphans | |
82 #end if | |
83 $seqBias | |
84 $gcBias | |
85 --threads "\${GALAXY_SLOTS:-4}" | |
86 --incompatPrior $adv.incompatPrior | |
87 $adv.consistentHits | |
88 $adv.dumpEq | |
89 #if str($adv.gcSizeSamp): | |
90 --gcSizeSamp $adv.gcSizeSamp | |
91 #end if | |
92 #if str($adv.biasSpeedSamp): | |
93 --biasSpeedSamp $adv.biasSpeedSamp | |
94 #end if | |
95 $adv.strictIntersect | |
96 #if str($adv.fldMax): | |
97 --fldMax $adv.fldMax | |
98 #end if | |
99 #if str($adv.fldMean): | |
100 --fldMean $adv.fldMean | |
101 #end if | |
102 #if str($adv.fldSD): | |
103 --fldSD $adv.fldSD | |
104 #end if | |
105 #if $adv.forgettingFactor: | |
106 --forgettingFactor $adv.forgettingFactor | |
107 #end if | |
108 $adv.writeMappings | |
109 #if str($adv.maxOcc): | |
110 --maxOcc $adv.maxOcc | |
111 #end if | |
112 $adv.initUniform | |
113 $adv.noFragLengthDist | |
114 $adv.noBiasLengthThreshold | |
115 #if str($adv.maxReadOcc): | |
116 --maxReadOcc $adv.maxReadOcc | |
117 #end if | |
118 #if $geneMap: | |
119 --geneMap ./geneMap.${geneMap.ext} | |
120 #end if | |
121 $adv.noEffectiveLengthCorrection | |
122 $adv.useVBOpt | |
123 #if str($adv.numBiasSamples): | |
124 --numBiasSamples $adv.numBiasSamples | |
125 #end if | |
126 #if str($adv.numAuxModelSamples): | |
127 --numAuxModelSamples $adv.numAuxModelSamples | |
128 #end if | |
129 #if str($adv.numPreAuxModelSamples): | |
130 --numPreAuxModelSamples $adv.numPreAuxModelSamples | |
131 #end if | |
132 #if str($adv.numGibbsSamples): | |
133 --numGibbsSamples $adv.numGibbsSamples | |
134 #end if | |
135 #if str($adv.numBootstraps): | |
136 --numBootstraps $adv.numBootstraps | |
137 #end if | |
138 $adv.perTranscriptPrior | |
139 #if $adv.vbPrior: | |
140 --vbPrior $adv.vbPrior | |
141 #end if | |
142 $adv.writeUnmappedNames | |
143 ]]> | |
144 </command> | |
145 | |
146 <inputs> | |
147 <conditional name="refTranscriptSource"> | |
148 <param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?" help="Built-ins were indexed using default options"> | |
149 <option value="indexed">Use a built-in index</option> | |
150 <option value="history" selected="True">Use one from the history</option> | |
151 </param> | |
152 <when value="indexed"> | |
153 <param name="index" type="select" label="Select a reference transcriptome" help="If your transcriptome of interest is not listed, contact your Galaxy admin"> | |
154 <options from_data_table="salmon_indexes"> | |
155 <filter type="sort_by" column="2"/> | |
156 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
157 </options> | |
158 </param> | |
159 </when> <!-- build-in --> | |
160 <when value="history"> | |
161 <param name="ownFile" type="data" format="fasta" label="Select the reference transcriptome" help="in FASTA format" /> | |
162 <param argument="kmerLen" type="integer" value="31" label="The size should be odd number."/> | |
163 </when> <!-- history --> | |
164 </conditional> | |
165 <conditional name="single_or_paired"> | |
166 <param name="single_or_paired_opts" type="select" label="Is this library mate-paired?"> | |
167 <option value="single">Single-end</option> | |
168 <option value="paired">Paired-end</option> | |
169 </param> | |
170 <when value="single"> | |
171 <param name="input_singles" type="data" format="fastq,fasta" label="FASTQ/FASTA file" help="FASTQ file." /> | |
172 <expand macro="strandedness" /> | |
173 </when> | |
174 <when value="paired"> | |
175 <param name="input_mate1" type="data" format="fastq,fasta" label="Mate pair 1" help="FASTQ file." /> | |
176 <param name="input_mate2" type="data" format="fastq,fasta" label="Mate pair 2" help="FASTQ file." /> | |
177 <param name="orientation" type="select" label="Relative orientation of reads within a pair"> | |
178 <option value="M">Mates are oriented in the same direction (M = matching)</option> | |
179 <option value="O">Mates are oriented away from each other (O = outward)</option> | |
180 <option value="I" selected="True">Mates are oriented toward each other (I = inward)</option> | |
181 </param> | |
182 <expand macro="strandedness" /> | |
183 </when> | |
184 </conditional> | |
185 <conditional name="quasi_orphans"> | |
186 <param argument="--type" type="select" label="Type of index" help="When using quasi, orphaned reads will be considered when performing lightweight-alignment."> | |
187 <option value="quasi" selected="True">quasi</option> | |
188 <option value="fmd">fmd</option> | |
189 </param> | |
190 <when value="quasi"> | |
191 </when> <!-- build-in --> | |
192 <when value="fmd"> | |
193 <param argument="--allowOrphans" type="boolean" truevalue="--allowOrphans" falsevalue="" checked="True" | |
194 label="Consider orphaned reads as valid hits when performing lightweight-alignment" | |
195 help="This option will increase sensitivity (allow more reads to map and more transcripts to be detected), but may decrease specificity as orphaned alignments are more likely to be spurious."/> | |
196 </when> <!-- history --> | |
197 </conditional> | |
198 <param argument="--perfectHash" type="boolean" truevalue="--perfectHash" falsevalue="" checked="False" | |
199 label="Perfect Hash" | |
200 help="Build the index using a perfect hash rather than a dense hash. This will require less memory (especially during quantification), but will take longer to construct "/> | |
201 <param argument="--sasamp" type="integer" value="1" optional="True" label="Suffix Array" | |
202 help="The interval at which the suffix array should be sampled. Smaller values are faster, but produce a larger index. The default should be OK, unless your transcriptome is huge. This value should be a power of 2."/> | |
203 <param argument="--seqBias" type="boolean" truevalue="--seqBias" falsevalue="" checked="False" | |
204 label="Perform sequence-specific bias correction" | |
205 help=""/> | |
206 <param argument="--gcBias" type="boolean" truevalue="--gcBias" falsevalue="" checked="False" | |
207 label="Perform fragment GC bias correction" | |
208 help=""/> | |
209 <param argument="--geneMap" type="data" format="tabular,gff,gtf" optional="True" | |
210 label="File containing a mapping of transcripts to genes. If this file is provided Salmon will output both quant.sf and quant.genes.sf files, where the latter contains aggregated gene-level abundance estimates. The transcript to gene mapping should be provided as either a GTF file, or a in a simple tab-delimited format where each line contains the name of a transcript and the gene to which it belongs separated by a tab." /> | |
211 <section name="adv" title="Additional Options"> | |
212 <param argument="--writeMappings" type="boolean" truevalue="--writeMappings" falsevalue="" checked="False" | |
213 label="Write Mappings" | |
214 help=" Setting this option then the quasi-mapping results will be written out in SAM-cpmpatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead." /> | |
215 <param argument="--incompatPrior" type="float" optional="True" value="9.9999999999999995e-21" | |
216 label="Incompatible Prior" | |
217 help="This option sets the prior probability that an alignment that disagrees with the specified library type (--libType) results from the true fragment origin. Setting this to 0 specifies that alignments that disagree with the library type should be 'impossible', while setting it to 1 says that alignments that disagree with the library type are no less likely than those that do" /> | |
218 <param argument="--dumpEq" type="boolean" truevalue="--dumpEq" falsevalue="" checked="False" | |
219 label="Dump the equivalence class counts that were computed during quasi-mapping." help=""/> | |
220 <param argument="--gcSizeSamp" type="integer" value="1" optional="True" | |
221 label="The value by which to down-sample transcripts when representing the GC content" help="Larger values will reduce memory usage, but may decrease the fidelity of bias modeling results."/> | |
222 <param argument="--biasSpeedSamp" type="integer" value="1" optional="True" | |
223 label="The value at which the fragment length PMF is down-sampled when evaluating GC fragment bias." help="Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results."/> | |
224 <param argument="--strictIntersect" type="boolean" truevalue="--strictIntersect" falsevalue="" checked="False" | |
225 label="Modifies how orphans are assigned." help="When this flag is set, if the intersection of the quasi-mappings for the left and right is empty, then all mappings for the left and all mappings for the right read are reported as orphaned quasi-mappings."/> | |
226 <param argument="--minLen" type="integer" value="19" optional="True" | |
227 label=" (S)MEMs smaller than this size won't be considered." help="" /> | |
228 <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="False" | |
229 label="Perform sensitive quantification" | |
230 help=" Setting this option enables the splitting of SMEMs that are larger than 1.5 times the minimum seed length (minLen/k above). This may reveal high scoring chains of MEMs that are masked by long SMEMs. However, this option makes lightweight-alignment a bit slower and is usually not necessary if the reference is of reasonable quality." /> | |
231 <param argument="--consistentHits" type="boolean" truevalue="--consistentHits" falsevalue="" checked="False" | |
232 label="Force hits gathered during quasi-mapping to be consistent" | |
233 help="" /> | |
234 <param argument="--extraSensitive" type="boolean" truevalue="--extraSensitive" falsevalue="" checked="False" | |
235 label="Perform extra sensitive quantification" | |
236 help="Setting this option enables an extra pass of 'seed' search. Enabling this option may improve sensitivity (the number of reads having sufficient coverage), but will typically slow down quantification by ~40%. Consider enabling this option if you find the mapping rate to be significantly lower than expected."/> | |
237 <param argument="--coverage" type="float" value="0.69999999999999996" optional="True" | |
238 label="Required coverage of read by union of SMEMs to consider it a hit" | |
239 help="" /> | |
240 <param argument="--fldMax" type="integer" value="1000" optional="True" | |
241 label="The maximum fragment length to consider when building the empirical distribution." | |
242 help=""/> | |
243 <param argument="--fldMean" type="integer" value="200" optional="True" | |
244 label="The mean used in the fragment length distribution prior" | |
245 help="If single end reads are being used for quantification, or there are an insufficient number of uniquely mapping reads when performing paired-end quantification to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/> | |
246 <param argument="--fldSD" type="integer" value="80" optional="True" | |
247 label="Standard deviation" | |
248 help="The standard deviation used in the fragment length distribution prior."/> | |
249 <param argument="--forgettingFactor" type="float" value="0.65000000000000002" optional="True" | |
250 label="The forgetting factor used in the online learning schedule." | |
251 help=" A smaller value results in quicker learning, but higher variance and may be unstable. A larger value results in slower learning but may be more stable. Value should be in the interval (0.5, 1.0]." /> | |
252 <param argument="--maxOcc" type="integer" value="200" optional="True" | |
253 label="(S)MEMs occuring more than this many times won't be considered" | |
254 help=""/> | |
255 <param argument="--initUniform" type="boolean" truevalue="--initUniform" falsevalue="" checked="False" | |
256 label="Initialization with uniform parameters" | |
257 help="initialize the offline inference with uniform parameters, rather than seeding with online parameters." /> | |
258 <param argument="--maxReadOcc" type="integer" value="100" optional="True" | |
259 label="Maximal read mapping occurence" | |
260 help="Reads mapping to more than this many places won't be considered."/> | |
261 <param argument="--noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False" | |
262 label="Disable effective length correction" | |
263 help="Disables effective length correction when computing the probability that a fragment was generated from a transcript. If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/> | |
264 <param argument="--noFragLengthDist" type="boolean" truevalue="--noFragLengthDist" falsevalue="" checked="False" | |
265 label="Ignore fragment length distribution" | |
266 help="[experimental] : Don't consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location. Normally, Fragments with unlikely lengths will be assigned a smaller relative probability than those with more likely lengths. When this flag is passed in, the observed fragment length has no effect on that fragment's a priori probability." /> | |
267 <param argument="--noBiasLengthThreshold" type="boolean" truevalue="--noBiasLengthThreshold" falsevalue="" checked="False" | |
268 label="[experimental] : If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effecctive lengths." | |
269 help="This can increase the precision of bias correction, but harm robustness. The difault correction applies a threshold." /> | |
270 <param argument="--numBiasSamples" type="integer" value="2000000" optional="True" | |
271 label="Number of fragment mappings to use when learning the sequence-specific bias model." | |
272 help="" /> | |
273 <param argument="--numAuxModelSamples" type="integer" value="5000000" optional="True" | |
274 label="The first numAuxModelSamples are used to train the auxiliary model parameters." | |
275 help="(e.g. fragment length distribution, bias, etc.). After ther first numAuxModelSamples observations the auxiliary model parameters will be assumed to have converged and will be fixed." /> | |
276 <param argument="--numPreAuxModelSamples" type="integer" value="1000000" optional="True" | |
277 label="The first numPreAuxModelSamples will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models." | |
278 help=" The purpose of ignoring the auxiliary models for the first numPreAuxModelSamples observations is to avoid applying these models before thier parameters have been learned sufficiently well." /> | |
279 <param argument="--splitWidth" type="integer" value="0" optional="True" | |
280 label=" If (S)MEM occurs fewer than this many times, search for smaller, contained MEMs" | |
281 help="The default value will not split (S)MEMs, a higher value will result in more MEMs being explore and, thus, will result in increased running time." /> | |
282 <param argument="--splitSpanningSeeds" type="boolean" truevalue="--splitSpanningSeeds" falsevalue="" checked="False" | |
283 label="Attempt to split seeds that happen to fall on the boundary between two transcripts." | |
284 help="This can improve the fragment hit-rate, but is usually not necessary."/> | |
285 <param argument="--useVBOpt" type="boolean" truevalue="--useVBOpt" falsevalue="" checked="False" | |
286 label="Use the Variational Bayesian EM rather than the traditional EM algorithm for optimization in the batch passes." | |
287 help=""/> | |
288 <param argument="--numGibbsSamples" type="integer" value="0" optional="True" | |
289 label=" Number of Gibbs sampling rounds to perform." | |
290 help="" /> | |
291 <param argument="--numBootstraps" type="integer" value="0" optional="True" | |
292 label="Number of bootstrap samples to generate. Note: This is mutually exclusive with Gibbs sampling." | |
293 help="" /> | |
294 <param argument="--perTranscriptPrior" type="boolean" truevalue="--perTranscriptPrior" falsevalue="" checked="False" | |
295 label="The prior will be interpreted as a transcript-level prior." | |
296 help="either the default or the argument provided via --vbPrior" /> | |
297 <param argument="--vbPrior" type="float" value="0.001" optional="True" | |
298 label="The prior that will be used in the VBEM algorithm." | |
299 help="This is interpreted as a per-nucleotide prior, unless the --perTranscriptPrior flag is also given, in which case this is used as a transcript-level prior." /> | |
300 <param argument="--writeUnmappedNames" type="boolean" truevalue="--writeUnmappedNames" falsevalue="" checked="False" | |
301 label="Write the names of un-mapped reads to the file unmapped_names.txt." | |
302 help=""/> | |
303 </section> | |
304 </inputs> | |
305 | |
306 <outputs> | |
307 <data name="output_quant" format="tabular" from_work_dir="output/quant.sf" label="${tool.name} on ${on_string} (Quantification)" /> | |
308 <data name="output_gene_quant" format="tabular" from_work_dir="output/quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)"> | |
309 <filter>geneMap</filter> | |
310 </data> | |
311 </outputs> | |
312 | |
313 <tests> | |
314 <test> | |
315 <param name="single_or_paired_opts" value="paired" /> | |
316 <param name="input_mate1" value="reads_1.fastq" /> | |
317 <param name="input_mate2" value="reads_2.fastq" /> | |
318 <param name="biasCorrect" value="False" /> | |
319 <param name="TranscriptSource" value="history" /> | |
320 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
321 <output name="output_quant"> | |
322 <assert_contents> | |
323 <has_text text="EffectiveLength" /> | |
324 <has_text text="TPM" /> | |
325 <has_text text="NM_001168316" /> | |
326 <has_text text="NM_174914" /> | |
327 <has_text text="NM_018953" /> | |
328 <has_text text="NR_003084" /> | |
329 <has_text text="NM_017410" /> | |
330 <has_text text="NM_153693" /> | |
331 <has_text text="NR_031764" /> | |
332 <has_n_columns n="5" /> | |
333 </assert_contents> | |
334 </output> | |
335 </test> | |
336 <test> | |
337 <param name="single_or_paired_opts" value="paired" /> | |
338 <param name="input_mate1" value="reads_1.fastq" /> | |
339 <param name="input_mate2" value="reads_2.fastq" /> | |
340 <param name="TranscriptSource" value="history" /> | |
341 <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> | |
342 <param name="geneMap" value="gene_map.tab" ftype="tabular" /> | |
343 <output name="output_quant"> | |
344 <assert_contents> | |
345 <has_text text="EffectiveLength" /> | |
346 <has_text text="TPM" /> | |
347 <has_text text="NM_001168316" /> | |
348 <has_text text="NM_174914" /> | |
349 <has_text text="NM_018953" /> | |
350 <has_text text="NR_003084" /> | |
351 <has_text text="NM_017410" /> | |
352 <has_text text="NM_153693" /> | |
353 <has_text text="NR_031764" /> | |
354 <has_n_columns n="5" /> | |
355 </assert_contents> | |
356 </output> | |
357 <output name="output_gene_quant"> | |
358 <assert_contents> | |
359 <has_text text="EffectiveLength" /> | |
360 <has_text text="TPM" /> | |
361 <has_text text="baz" /> | |
362 <has_text text="bar" /> | |
363 <has_text text="2283" /> | |
364 <has_text text="1640" /> | |
365 <has_n_columns n="5" /> | |
366 </assert_contents> | |
367 </output> | |
368 </test> | |
369 </tests> | |
370 | |
371 <help><![CDATA[ | |
372 **What it does** | |
373 salmon is a tool for transcript quantification from RNA-seq data. It | |
374 requires a set of target transcripts (either from a reference or de-novo | |
375 assembly) to quantify. All you need to run Salmon is a fasta file containing | |
376 your reference transcripts and a (set of) fasta/fastq file(s) containing your | |
377 reads. Salmon runs in two phases; indexing and quantification. The indexing | |
378 step is independent of the reads, and only need to be run one for a particular | |
379 set of reference transcripts and choice of k (the k-mer size). The | |
380 quantification step, obviously, is specific to the set of RNA-seq reads and is | |
381 thus run more frequently. | |
382 When the quantification output contains a number of columns: | |
383 (1) Transcript ID, | |
384 (2) Transcript Length, | |
385 (3) Transcripts per Million (TPM) and | |
386 (4) Estimated number of reads (an estimate of the number of reads drawn from this transcript given the transcript’s relative abundance and length). | |
387 The first two columns are self-explanatory, the next four are measures of transcript abundance and the final is a commonly used input for differential expression tools. | |
388 The Transcripts per Million quantification number is computed as described in [1], and is meant as an estimate of the number of transcripts, per million observed transcripts, | |
389 originating from each isoform. Its benefit over the F/RPKM measure is that it is independent of the mean expressed transcript length | |
390 (i.e. if the mean expressed transcript length varies between samples, for example, this alone can affect differential analysis based on the K/RPKM.). | |
391 | |
392 | |
393 Fragment Library Types | |
394 ====================== | |
395 | |
396 There are numerous library preparation protocols for RNA-seq that result in | |
397 sequencing reads with different characteristics. For example, reads can be | |
398 single end (only one side of a fragment is recorded as a read) or paired-end | |
399 (reads are generated from both ends of a fragment). Further, the sequencing | |
400 reads themselves may be unstraned or strand-specific. Finally, paired-end | |
401 protocols will have a specified relative orientation. To characterize the | |
402 various different typs of sequencing libraries, we've created a miniature | |
403 "language" that allows for the succinct description of the many different types | |
404 of possible fragment libraries. For paired-end reads, the possible | |
405 orientations, along with a graphical description of what they mean, are | |
406 illustrated below: | |
407 .. image:: ReadLibraryIllustration.png | |
408 The library type string consists of three parts: the relative orientation of | |
409 the reads, the strandedness of the library, and the directionality of the | |
410 reads. | |
411 The first part of the library string (relative orientation) is only provided if | |
412 the library is paired-end. The possible options are: | |
413 :: | |
414 | |
415 I = inward | |
416 O = outward | |
417 M = matching | |
418 | |
419 The second part of the read library string specifies whether the protocol is | |
420 stranded or unstranded; the options are: | |
421 :: | |
422 | |
423 S = stranded | |
424 U = unstranded | |
425 | |
426 If the protocol is unstranded, then we're done. The final part of the library | |
427 string specifies the strand from which the read originates in a strand-specific | |
428 protocol — it is only provided if the library is stranded (i.e. if the | |
429 library format string is of the form S). The possible values are: | |
430 :: | |
431 | |
432 F = read 1 (or single-end read) comes from the forward strand | |
433 R = read 1 (or single-end read) comes from the reverse strand | |
434 | |
435 So, for example, if you wanted to specify a fragment library of strand-specific | |
436 paired-end reads, oriented toward each other, where read 1 comes from the | |
437 forward strand and read 2 comes from the reverse strand, you would specify ``-l | |
438 ISF`` on the command line. This designates that the library being processed has | |
439 the type "ISF" meaning, **I**\ nward (the relative orientation), **S**\ tranted | |
440 (the protocol is strand-specific), **F**\ orward (read 1 comes from the forward | |
441 strand). | |
442 The single end library strings are a bit simpler than their pair-end counter | |
443 parts, since there is no relative orientation of which to speak. Thus, the | |
444 only possible library format types for single-end reads are ``U`` (for | |
445 unstranded), ``SF`` (for strand-specific reads coming from the forward strand) | |
446 and ``SR`` (for strand-specific reads coming from the reverse strand). | |
447 A few more examples of some library format strings and their interpretations are: | |
448 :: | |
449 | |
450 IU (an unstranded paired-end library where the reads face each other) | |
451 | |
452 :: | |
453 | |
454 SF (a stranded single-end protocol where the reads come from the forward strand) | |
455 | |
456 :: | |
457 | |
458 OSR (a stranded paired-end protocol where the reads face away from each other, | |
459 read1 comes from reverse strand and read2 comes from the forward strand) | |
460 | |
461 .. note:: Correspondence to TopHat library types | |
462 | |
463 The popular `TopHat <http://ccb.jhu.edu/software/tophat/index.shtml>`_ RNA-seq | |
464 read aligner has a different convention for specifying the format of the library. | |
465 Below is a table that provides the corresponding Salmon/salmon library format | |
466 string for each of the potential TopHat library types: | |
467 | |
468 +---------------------+-------------------------+ | |
469 | TopHat | Salmon (and Sailfish) | | |
470 +=====================+============+============+ | |
471 | | Paired-end | Single-end | | |
472 +---------------------+------------+------------+ | |
473 |``-fr-unstranded`` |``-l IU`` |``-l U`` | | |
474 +---------------------+------------+------------+ | |
475 |``-fr-firststrand`` |``-l ISR`` |``-l SR`` | | |
476 +---------------------+------------+------------+ | |
477 |``-fr-secondstrand`` |``-l ISF`` |``-l SF`` | | |
478 +---------------------+------------+------------+ | |
479 | |
480 The remaining salmon library format strings are not directly expressible in terms | |
481 of the TopHat library types, and so there is no direct mapping for them. | |
482 ]]> </help> | |
483 <citations> | |
484 <citation type="doi">10.1101/021592</citation> | |
485 </citations> | |
486 </tool> |