salmon: old/salmon.xml comparison

comparison old/salmon.xml @ 11:666bb48b1007 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 02087ce2966cf8b4aac9197a41171e7f986c11d1-dirty"

author	bgruening
date	Wed, 02 Oct 2019 04:31:53 -0400
parents
children

comparison

equal deleted inserted replaced

-:4de6e2e40c7a
+:666bb48b1007
+<tool id="salmon" name="Salmon" version="@VERSION@">
+<description>Transcript Quantification from RNA-seq data</description>
+<macros>
+<xml name="strandedness">
+<param name="strandedness" type="select" label="Specify the strandedness of the reads">
+<option value="U" selected="True">Not stranded (U)</option>
+<option value="SF">read 1 (or single-end read) comes from the forward strand (SF)</option>
+<option value="SR">read 1 (or single-end read) comes from the reverse strand (SR)</option>
+</param>
+</xml>
+<xml name="orientation">
+<param name="orientation" type="select" label="Relative orientation of reads within a pair">
+<option value="M">Mates are oriented in the same direction (M = matching)</option>
+<option value="O">Mates are oriented away from each other (O = outward)</option>
+<option value="I" selected="True">Mates are oriented toward each other (I = inward)</option>
+</param>
+</xml>
+<token name="@VERSION@">0.11.2</token>
+<token name="@IDX_VERSION@">q5</token>
+</macros>
+<requirements>
+<requirement type="package" version="1.0.6">bzip2</requirement>
+<requirement type="package" version="@VERSION@">salmon</requirement>
+<requirement type="package" version="1.2">seqtk</requirement>
+</requirements>
+<stdio>
+<exit_code range="1:" />
+<exit_code range=":-1" />
+<regex match="Error:" />
+<regex match="Exception:" />
+<regex match="Exception :" />
+</stdio>
+<version_command>salmon -version</version_command>
+<command><![CDATA[
+mkdir ./index
+&&
+mkdir ./output
+#if $refTranscriptSource.TranscriptSource == "history":
+&&
+salmon index
+--transcripts $refTranscriptSource.ownFile
+--kmerLen $refTranscriptSource.kmerLen
+--threads "\${GALAXY_SLOTS:-4}"
+--index './index'
+--type '$quasi_orphans.type'
+$perfectHash
+#set $index_path = './index'
+#else:
+#set $index_path = $refTranscriptSource.index.fields.path
+#end if
+&&
+#set compressed = 'no'
+#if $single_or_paired.single_or_paired_opts == 'single':
+#if $single_or_paired.input_singles.ext == 'fasta':
+#set $ext = 'fasta'
+#else:
+#if $single_or_paired.input_singles.is_of_type("fastq.gz", "fastqsanger.gz"):
+#set compressed = 'GZ'
+#else if $single_or_paired.input_singles.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+#set compressed = 'BZ2'
+#end if
+#set $ext = 'fastq'
+#end if
+ln -s $single_or_paired.input_singles ./single.$ext &&
+#else if $single_or_paired.single_or_paired_opts == 'paired':
+#if $single_or_paired.input_mate1.ext == 'fasta':
+#set $ext = 'fasta'
+#else:
+#if $single_or_paired.input_mate1.is_of_type("fastq.gz", "fastqsanger.gz"):
+#set compressed = 'GZ'
+#else if $single_or_paired.input_mate1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+#set compressed = 'BZ2'
+#end if
+#set $ext = 'fastq'
+#end if
+ln -s $single_or_paired.input_mate1 ./mate1.$ext &&
+ln -s $single_or_paired.input_mate2 ./mate2.$ext &&
+#else if $single_or_paired.single_or_paired_opts == 'paired_collection':
+#if $single_or_paired.input_1.forward.ext == 'fasta':
+#set $ext = 'fasta'
+#else:
+#if $single_or_paired.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
+#set compressed = 'GZ'
+#else if $single_or_paired.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+#set compressed = 'BZ2'
+#end if
+#set $ext = 'fastq'
+#end if
+ln -s ${single_or_paired.input_1.forward} ./mate1.$ext &&
+ln -s ${single_or_paired.input_1.reverse} ./mate2.$ext &&
+#else if $single_or_paired.single_or_paired_opts == 'paired_interleaved':
+#if $single_or_paired.input_1.ext == 'fasta':
+#set $ext = 'fasta'
+#else:
+#if $single_or_paired.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
+#set compressed = 'GZ'
+#else if $single_or_paired.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+#set compressed = 'BZ2'
+#end if
+#set $ext = 'fastq'
+#end if
+ln -s $single_or_paired.input_1 ./mate1.$ext &&
+#end if
+#if $geneMap:
+ln -s "$geneMap" ./geneMap.${geneMap.ext} &&
+#end if
+salmon quant
+--index $index_path
+#if $single_or_paired.single_or_paired_opts == 'single':
+--libType ${single_or_paired.strandedness}
+#if $compressed == 'GZ':
+--unmatedReads <(zcat < ./single.$ext)
+#else if $compressed == 'BZ2':
+--unmatedReads <(bzcat < ./single.$ext)
+#else:
+--unmatedReads ./single.$ext
+#end if
+#else:
+--libType "${single_or_paired.orientation}${single_or_paired.strandedness}"
+#if $single_or_paired.single_or_paired_opts == 'paired_interleaved':
+#if $compressed == 'BZ2':
+--mates1 <(bzcat < ./mate1.$ext | seqtk seq -1)
+--mates2 <(bzcat < ./mate1.$ext | seqtk seq -2)
+#else:
+--mates1 <(seqtk seq -1 ./mate1.$ext)
+--mates2 <(seqtk seq -2 ./mate1.$ext)
+#end if
+#else:
+#if $compressed == 'GZ':
+--mates1 <(zcat < ./mate1.$ext)
+--mates2 <(zcat < ./mate2.$ext)
+#else if $compressed == 'BZ2':
+--mates1 <(bzcat < ./mate1.$ext)
+--mates2 <(bzcat < ./mate2.$ext)
+#else:
+--mates1 ./mate1.$ext
+--mates2 ./mate2.$ext
+#end if
+#end if
+#end if
+--output ./output
+#if str($quasi_orphans.type) == 'quasi':
+--allowOrphans
+$quasi_orphans.validateMappings
+--ma $quasi_orphans.matchScore
+--mp $quasi_orphans.mismatchPenalty
+--go $quasi_orphans.gapOpenPenalty
+--ge $quasi_orphans.gapExtensionPenalty
+--minScoreFraction $quasi_orphans.minScoreFraction
+#end if
+$seqBias
+$gcBias
+$noErrorModel
+--threads "\${GALAXY_SLOTS:-4}"
+--incompatPrior $adv.incompatPrior
+$adv.consistentHits
+$adv.dumpEq
+$adv.reduceGCMemory
+#if str($adv.biasSpeedSamp):
+--biasSpeedSamp $adv.biasSpeedSamp
+#end if
+$adv.strictIntersect
+#if str($adv.fldMax):
+--fldMax $adv.fldMax
+#end if
+#if str($adv.fldMean):
+--fldMean $adv.fldMean
+#end if
+#if str($adv.fldSD):
+--fldSD $adv.fldSD
+#end if
+#if $adv.forgettingFactor:
+--forgettingFactor $adv.forgettingFactor
+#end if
+$adv.initUniform
+$adv.noFragLengthDist
+$adv.noBiasLengthThreshold
+#if str($adv.maxReadOcc):
+--maxReadOcc $adv.maxReadOcc
+#end if
+#if $geneMap:
+--geneMap ./geneMap.${geneMap.ext}
+#end if
+$adv.noEffectiveLengthCorrection
+$adv.useEM
+#if str($adv.numBiasSamples):
+--numBiasSamples $adv.numBiasSamples
+#end if
+#if str($adv.numAuxModelSamples):
+--numAuxModelSamples $adv.numAuxModelSamples
+#end if
+#if str($adv.numPreAuxModelSamples):
+--numPreAuxModelSamples $adv.numPreAuxModelSamples
+#end if
+#if str($adv.numGibbsSamples):
+--numGibbsSamples $adv.numGibbsSamples
+#end if
+#if str($adv.numBootstraps):
+--numBootstraps $adv.numBootstraps
+#end if
+#if str($adv.consensusSlack):
+--consensusSlack $adv.consensusSlack
+#else:
+#if $quasi_orphans.validateMappings:
+--consensusSlack 1
+#else:
+--consensusSlack 0
+#end if
+#end if
+$adv.perTranscriptPrior
+#if $adv.vbPrior:
+--vbPrior $adv.vbPrior
+#end if
+$adv.writeUnmappedNames
+--sigDigits $adv.sigDigits
+#if str($adv.writeMappings):
+$adv.writeMappings > ${output_sam}
+#end if
+]]>
+</command>
+<inputs>
+<conditional name="refTranscriptSource">
+<param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+<option value="indexed">Use a built-in index</option>
+<option value="history" selected="True">Use one from the history</option>
+</param>
+<when value="indexed">
+<param name="index" type="select" label="Select a reference transcriptome" help="If your transcriptome of interest is not listed, contact your Galaxy admin">
+<options from_data_table="salmon_indexes_versioned">
+<filter type="sort_by" column="2"/>
+<filter type="static_value" column="4" value="@IDX_VERSION@" />
+<validator type="no_options" message="No indexes are available for the selected input dataset"/>
+</options>
+</param>
+</when>  <!-- build-in -->
+<when value="history">
+<param name="ownFile" type="data" format="fasta" label="Select the reference transcriptome" help="in FASTA format" />
+<param argument="kmerLen" type="integer" value="31" label="The size should be odd number."/>
+</when>  <!-- history -->
+</conditional>
+<conditional name="single_or_paired">
+<param name="single_or_paired_opts" type="select" label="Is this library mate-paired?">
+<option value="single">Single-end</option>
+<option value="paired">Paired-end</option>
+<option value="paired_collection">Paired-end Dataset Collection</option>
+<option value="paired_interleaved">Paired-end data from single interleaved dataset</option>
+</param>
+<when value="single">
+<param name="input_singles" type="data" format="fastq,fasta,fastq.gz,fastq.bz2" label="FASTQ/FASTA file" help="FASTQ file." />
+<expand macro="strandedness" />
+</when>
+<when value="paired">
+<param name="input_mate1" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 1" help="FASTQ file." />
+<param name="input_mate2" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 2" help="FASTQ file." />
+<expand macro="orientation" />
+<expand macro="strandedness" />
+</when>
+<when value="paired_collection">
+<param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
+<expand macro="orientation" />
+<expand macro="strandedness" />
+</when>
+<when value="paired_interleaved">
+<param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="Interleaved FASTQ file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;. --interleaved"/>
+<expand macro="orientation" />
+<expand macro="strandedness" />
+</when>
+</conditional>
+<conditional name="quasi_orphans">
+<param argument="--type" type="select" label="Type of index" help="When using quasi, orphaned reads will be considered when performing lightweight-alignment.">
+<option value="quasi" selected="True">quasi</option>
+</param>
+<when value="quasi">
+<param argument="--validateMappings" type="boolean" truevalue="--validateMappings" falsevalue="" checked="False"
+label="Validate mappings"
+help="Validate mappings using alignment-based verifcation. If this flag is passed, quasi-mappings will be validated to ensure that they could give rise to a reasonable alignment before they are further used for quantification."/>
+<param name="matchScore" argument="--ma" type="integer" value="2"
+label="Match Score"
+help="The value given to a match between read and reference nucleotides in an alignment."/>
+<param name="mismatchPenalty" argument="--mp" type="integer" value="4"
+label="Mismatch Penalty"
+help="The value given to a mis-match between read and reference nucleotides in an alignment. This will be cast to a negative value."/>
+<param name="gapOpenPenalty" argument="--go" type="integer" value="5"
+label="Gap Open Penalty"
+help="The value given to a gap opening in an alignment."/>
+<param name="gapExtensionPenalty" argument="--ge" type="integer" value="3"
+label="Gap Extension Penalty"
+help="The value given to a gap extension in an alignment."/>
+<param argument="--minScoreFraction" type="float" value="0.65" min="0.0" max="0.99"
+label="Min Score Fraction"
+help="The fraction of the optimal possible alignment score that a mapping must achieve in order to be considered valid. Should be in (0,1]."/>
+</when>  <!-- build-in -->
+</conditional>
+<param argument="--perfectHash" type="boolean" truevalue="--perfectHash" falsevalue="" checked="False"
+label="Perfect Hash"
+help="Build the index using a perfect hash rather than a dense hash.  This will require  less memory (especially during quantification), but will take longer to construct "/>
+<param argument="--seqBias" type="boolean" truevalue="--seqBias" falsevalue="" checked="False"
+label="Perform sequence-specific bias correction"
+help=""/>
+<param argument="--gcBias" type="boolean" truevalue="--gcBias" falsevalue="" checked="False"
+label="Perform fragment GC bias correction"
+help=""/>
+<param argument="--geneMap" type="data" format="tabular,gff,gtf" optional="True"
+label="File containing a mapping of transcripts to genes"
+help="If this file is provided Salmon will output both quant.sf and quant.genes.sf files, where the latter contains aggregated gene-level abundance estimates. The transcript to gene mapping should be provided as either a GTF file, or a in a simple tab-delimited format where each line contains the name of a transcript and the gene to which it belongs separated by a tab." />
+<param argument="--noErrorModel" type="boolean" truevalue="--noErrorModel" falsevalue="" checked="False"
+label="No Error Model"
+help="Turn off the alignment error model, which takes into account the the observed frequency of different types of mismatches / indels when computing the likelihood of a given alignment. Turning this off can speed up alignment-based salmon, but can harm quantification accuracy."/>
+<section name="adv" title="Additional Options">
+<param argument="--writeMappings" type="boolean" truevalue="--writeMappings" falsevalue="" checked="False"
+label="Write Mappings"
+help="If this option is set to 'Yes', then the quasi-mapping results will be written out in SAM-compatible format. By default, output is directed to stdout." />
+<param argument="--incompatPrior" type="float" optional="True" value="9.9999999999999995e-21"
+label="Incompatible Prior"
+help="This option sets the prior probability that an alignment that disagrees with the specified library type (--libType) results from the true fragment origin. Setting this to 0 specifies that alignments that disagree with the library type should be 'impossible', while setting it to 1 says that alignments that disagree with the library type are no less likely than those that do" />
+<param argument="--dumpEq" type="boolean" truevalue="--dumpEq" falsevalue="" checked="False"
+label="Dump the equivalence class counts that were computed during quasi-mapping." help=""/>
+<param argument="--reduceGCMemory" type="boolean" truevalue="--reduceGCMemory" falsevalue="" optional="True" checked="False"
+label="If this option is selected, a more memory efficient (but slightly slower representation is used to compute fragment GC content."
+help="Enabling this will reduce memory usage, but can also reduce speed.  However, the results themselves will remain the same."/>
+<param argument="--biasSpeedSamp" type="integer" value="1" optional="True"
+label="The value at which the fragment length PMF is down-sampled when evaluating GC fragment bias." help="Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results."/>
+<param argument="--strictIntersect" type="boolean" truevalue="--strictIntersect" falsevalue="" checked="False"
+label="Modifies how orphans are assigned." help="When this flag is set, if the intersection of the quasi-mappings for the left and right is empty, then all mappings for the left and all mappings for the right read are reported as orphaned quasi-mappings."/>
+<param argument="--minLen" type="integer" value="19" optional="True"
+label=" (S)MEMs smaller than this size won't be considered." help="" />
+<param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="False"
+label="Perform sensitive quantification"
+help=" Setting this option enables the splitting of SMEMs that are larger than 1.5 times the minimum seed length (minLen/k above).  This may reveal high scoring chains of MEMs that are masked by long SMEMs.  However, this option makes lightweight-alignment a bit slower and is usually not necessary if the reference is of reasonable quality." />
+<param argument="--consistentHits" type="boolean" truevalue="--consistentHits" falsevalue="" checked="False"
+label="Force hits gathered during quasi-mapping to be consistent"
+help="" />
+<param argument="--extraSensitive" type="boolean" truevalue="--extraSensitive" falsevalue="" checked="False"
+label="Perform extra sensitive quantification"
+help="Setting this option enables an extra pass of 'seed' search. Enabling this option may improve sensitivity (the number of reads having sufficient coverage), but will typically slow down quantification by ~40%.  Consider enabling this option if you find the mapping rate to be significantly lower than expected."/>
+<param argument="--coverage" type="float" value="0.69999999999999996" optional="True"
+label="Required coverage of read by union of SMEMs to consider it a hit"
+help="" />
+<param argument="--fldMax" type="integer" value="1000" optional="True"
+label="The maximum fragment length to consider when building the empirical distribution."
+help=""/>
+<param argument="--fldMean" type="integer" value="200" optional="True"
+label="The mean used in the fragment length distribution prior"
+help="If single end reads are being used for quantification, or there are an insufficient number of uniquely mapping reads when performing paired-end quantification to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/>
+<param argument="--fldSD" type="integer" value="80" optional="True"
+label="Standard deviation"
+help="The standard deviation used in the fragment length distribution prior."/>
+<param argument="--forgettingFactor" type="float" value="0.65000000000000002" optional="True"
+label="The forgetting factor used in the online learning schedule."
+help=" A smaller value results in quicker learning, but higher variance and may be unstable. A larger value results in slower learning but may be more stable.  Value should be in the interval (0.5, 1.0]." />
+<param argument="--initUniform" type="boolean" truevalue="--initUniform" falsevalue="" checked="False"
+label="Initialization with uniform parameters"
+help="initialize the offline inference with uniform parameters, rather than seeding with online parameters." />
+<param argument="--maxReadOcc" type="integer" value="100" optional="True"
+label="Maximal read mapping occurence"
+help="Reads mapping to more than this many places won't be considered."/>
+<param argument="--noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False"
+label="Disable effective length correction"
+help="Disables effective length correction when computing the probability that a fragment was generated from a transcript. If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/>
+<param argument="--noFragLengthDist" type="boolean" truevalue="--noFragLengthDist" falsevalue="" checked="False"
+label="Ignore fragment length distribution"
+help="[experimental] : Don't consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location.  Normally, Fragments with unlikely lengths will be assigned a smaller relative probability than those with more likely lengths. When this flag is passed in, the observed fragment length has no effect on that fragment's a priori probability." />
+<param argument="--noBiasLengthThreshold" type="boolean" truevalue="--noBiasLengthThreshold" falsevalue="" checked="False"
+label="[experimental] : If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effecctive lengths."
+help="This can increase the precision of bias correction, but harm robustness. The default correction applies a threshold." />
+<param argument="--numBiasSamples" type="integer" value="2000000" optional="True"
+label="Number of fragment mappings to use when learning the sequence-specific bias model."
+help="" />
+<param argument="--numAuxModelSamples" type="integer" value="5000000" optional="True"
+label="The first numAuxModelSamples are used to train the auxiliary model parameters."
+help="(e.g. fragment length distribution, bias, etc.). After ther first numAuxModelSamples observations the auxiliary model parameters will be assumed to have converged and will be fixed." />
+<param argument="--numPreAuxModelSamples" type="integer" value="1000000" optional="True"
+label="The first numPreAuxModelSamples will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models."
+help=" The purpose of ignoring the auxiliary models for the first numPreAuxModelSamples observations is to avoid applying these models before thier parameters have been learned sufficiently well." />
+<param argument="--splitWidth" type="integer" value="0" optional="True"
+label=" If (S)MEM occurs fewer than this many times, search for smaller, contained  MEMs"
+help="The default value will not split (S)MEMs, a higher value will result in more MEMs being explore and, thus, will result in increased running time." />
+<param argument="--splitSpanningSeeds" type="boolean" truevalue="--splitSpanningSeeds" falsevalue="" checked="False"
+label="Attempt to split seeds that happen to fall on the boundary between two transcripts."
+help="This can improve the fragment hit-rate, but is usually not necessary."/>
+<param argument="--useEM" type="boolean" truevalue="--useEM" falsevalue="" checked="False"
+label="Use the traditional EM algorithm for optimization in the batch passes."
+help=""/>
+<param argument="--numGibbsSamples" type="integer" value="0" optional="True"
+label=" Number of Gibbs sampling rounds to perform."
+help="" />
+<param argument="--numBootstraps" type="integer" value="0" optional="True"
+label="Number of bootstrap samples to generate. Note: This is mutually exclusive with Gibbs sampling."
+help="" />
+<param argument="--perTranscriptPrior" type="boolean" truevalue="--perTranscriptPrior" falsevalue="" checked="False"
+label="The prior will be interpreted as a transcript-level prior."
+help="either the default or the argument provided via --vbPrior" />
+<param argument="--vbPrior" type="float" value="0.001" optional="True"
+label="The prior that will be used in the VBEM algorithm."
+help="This is interpreted as a per-nucleotide prior, unless the --perTranscriptPrior flag is also given, in which case this is used as a transcript-level prior." />
+<param argument="--writeUnmappedNames" type="boolean" truevalue="--writeUnmappedNames" falsevalue="" checked="False"
+label="Write the names of un-mapped reads to the file unmapped_names.txt."
+help=""/>
+<param argument="--sigDigits" type="integer" value="3"
+label="Significant Digits"
+help="The number of significant digits to write when outputting the EffectiveLength and NumReads columns."/>
+<param argument="--consensusSlack" type="integer" optional="True"
+label="Consensus Slack"
+help="The amount of slack allowed in the quasi-mapping consensus mechanism.  Normally, a transcript must cover all hits to be considered for mapping. If this is set to a value, X, greater than 0, then a transcript can fail to cover up to X hits before it is discounted as a mapping candidate.  The default value of this option is 1 if --validateMappings is given and 0 otherwise."/>
+</section>
+</inputs>
+<outputs>
+<data name="output_quant" format="tabular" from_work_dir="output/quant.sf" label="${tool.name} on ${on_string} (Quantification)" />
+<data name="output_gene_quant" format="tabular" from_work_dir="output/quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)">
+<filter>geneMap</filter>
+</data>
+<data name="output_sam" format="sam" label="${tool.name} on ${on_string} (SAM format)">
+<filter>adv['writeMappings']</filter>
+</data>
+</outputs>
+<tests>
+<test>
+<param name="single_or_paired_opts" value="paired" />
+<param name="input_mate1" value="reads_1.fastq" />
+<param name="input_mate2" value="reads_2.fastq" />
+<param name="biasCorrect" value="False" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test> <!--test use of built-in index-->
+<param name="single_or_paired_opts" value="paired" />
+<param name="input_mate1" value="reads_1.fastq" />
+<param name="input_mate2" value="reads_2.fastq" />
+<param name="biasCorrect" value="False" />
+<param name="TranscriptSource" value="indexed" />
+<param name="index" value="hg19_transcript_subset" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test> <!-- gzipped input -->
+<param name="single_or_paired_opts" value="paired" />
+<param name="input_mate1" value="reads_1.fastq.gz" ftype="fastqsanger.gz" />
+<param name="input_mate2" value="reads_2.fastq.gz" ftype="fastqsanger.gz" />
+<param name="biasCorrect" value="False" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test> <!-- bzipped input -->
+<param name="single_or_paired_opts" value="paired" />
+<param name="input_mate1" value="reads_1.fastq.bz2" ftype="fastqsanger.bz2" />
+<param name="input_mate2" value="reads_2.fastq.bz2" ftype="fastqsanger.bz2" />
+<param name="biasCorrect" value="False" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test> <!-- interleaved bz2 input -->
+<param name="single_or_paired_opts" value="paired_interleaved" />
+<param name="input_1" value="reads_both.fastq.bz2" ftype="fastqsanger.bz2" />
+<param name="biasCorrect" value="False" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test> <!-- interleaved gz input -->
+<param name="single_or_paired_opts" value="paired_interleaved" />
+<param name="input_1" value="reads_both.fastq.gz" ftype="fastqsanger.gz" />
+<param name="biasCorrect" value="False" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="single_or_paired_opts" value="paired" />
+<param name="input_mate1" value="reads_1.fastq" />
+<param name="input_mate2" value="reads_2.fastq" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<param name="geneMap" value="gene_map.tab" ftype="tabular" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+<output name="output_gene_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="baz" />
+<has_text text="bar" />
+<has_text text="2283" />
+<has_text text="1640" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="single_or_paired_opts" value="paired_collection" />
+<param name="input_1">
+<collection type="paired">
+<element name="forward" value="reads_1.fastq" ftype="fastqsanger" />
+<element name="reverse" value="reads_2.fastq" ftype="fastqsanger" />
+</collection>
+</param>
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<param name="geneMap" value="gene_map.tab" ftype="tabular" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+<output name="output_gene_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="baz" />
+<has_text text="bar" />
+<has_text text="2283" />
+<has_text text="1640" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="single_or_paired_opts" value="paired" />
+<param name="input_mate1" value="reads_1.fastq" />
+<param name="input_mate2" value="reads_2.fastq" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<param name="geneMap" value="gene_map.tab" ftype="tabular" />
+<param name="validateMappings" value="True" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+<output name="output_gene_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="baz" />
+<has_text text="bar" />
+<has_text text="2283" />
+<has_text text="1640" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="single_or_paired_opts" value="paired_collection" />
+<param name="input_1">
+<collection type="paired">
+<element name="forward" value="reads_1.fastq" ftype="fastqsanger" />
+<element name="reverse" value="reads_2.fastq" ftype="fastqsanger" />
+</collection>
+</param>
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<param name="geneMap" value="gene_map.tab" ftype="tabular" />
+<param name="validateMappings" value="True" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+<output name="output_gene_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="baz" />
+<has_text text="bar" />
+<has_text text="2283" />
+<has_text text="1640" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="single_or_paired_opts" value="paired" />
+<param name="input_mate1" value="reads_1.fastq" />
+<param name="input_mate2" value="reads_2.fastq" />
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<param name="geneMap" value="gene_map.tab" ftype="tabular" />
+<param name="useEM" value="True" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+<output name="output_gene_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="baz" />
+<has_text text="bar" />
+<has_text text="2283" />
+<has_text text="1640" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+<test>
+<param name="single_or_paired_opts" value="paired_collection" />
+<param name="input_1">
+<collection type="paired">
+<element name="forward" value="reads_1.fastq" ftype="fastqsanger" />
+<element name="reverse" value="reads_2.fastq" ftype="fastqsanger" />
+</collection>
+</param>
+<param name="TranscriptSource" value="history" />
+<param name="ownFile" value="transcripts.fasta" ftype="fasta" />
+<param name="geneMap" value="gene_map.tab" ftype="tabular" />
+<param name="useEM" value="True" />
+<output name="output_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="NM_001168316" />
+<has_text text="NM_174914" />
+<has_text text="NM_018953" />
+<has_text text="NR_003084" />
+<has_text text="NM_017410" />
+<has_text text="NM_153693" />
+<has_text text="NR_031764" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+<output name="output_gene_quant">
+<assert_contents>
+<has_text text="EffectiveLength" />
+<has_text text="TPM" />
+<has_text text="baz" />
+<has_text text="bar" />
+<has_text text="2283" />
+<has_text text="1640" />
+<has_n_columns n="5" />
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+**What it does**
+salmon is a tool for transcript quantification from RNA-seq data.  It
+requires a set of target transcripts (either from a reference or de-novo
+assembly) to quantify.  All you need to run Salmon is a fasta file containing
+your reference transcripts and a (set of) fasta/fastq file(s) containing your
+reads.  Salmon runs in two phases; indexing and quantification.  The indexing
+step is independent of the reads, and only need to be run one for a particular
+set of reference transcripts and choice of k (the k-mer size). The
+quantification step, obviously, is specific to the set of RNA-seq reads and is
+thus run more frequently.
+When the quantification output contains a number of columns:
+(1) Transcript ID,
+(2) Transcript Length,
+(3) Transcripts per Million (TPM) and
+(4) Estimated number of reads (an estimate of the number of reads drawn from this transcript given the transcript’s relative abundance and length).
+The first two columns are self-explanatory, the next four are measures of transcript abundance and the final is a commonly used input for differential expression tools.
+The Transcripts per Million quantification number is computed as described in [1], and is meant as an estimate of the number of transcripts, per million observed transcripts,
+originating from each isoform. Its benefit over the F/RPKM measure is that it is independent of the mean expressed transcript length
+(i.e. if the mean expressed transcript length varies between samples, for example, this alone can affect differential analysis based on the K/RPKM.).
+Fragment Library Types
+======================
+There are numerous library preparation protocols for RNA-seq that result in
+sequencing reads with different characteristics.  For example, reads can be
+single end (only one side of a fragment is recorded as a read) or paired-end
+(reads are generated from both ends of a fragment).  Further, the sequencing
+reads themselves may be unstraned or strand-specific.  Finally, paired-end
+protocols will have a specified relative orientation.  To characterize the
+various different typs of sequencing libraries, we've created a miniature
+"language" that allows for the succinct description of the many different types
+of possible fragment libraries.  For paired-end reads, the possible
+orientations, along with a graphical description of what they mean, are
+illustrated below:
+.. image:: ReadLibraryIllustration.png
+The library type string consists of three parts: the relative orientation of
+the reads, the strandedness of the library, and the directionality of the
+reads.
+The first part of the library string (relative orientation) is only provided if
+the library is paired-end. The possible options are:
+::
+I = inward
+O = outward
+M = matching
+The second part of the read library string specifies whether the protocol is
+stranded or unstranded; the options are:
+::
+S = stranded
+U = unstranded
+If the protocol is unstranded, then we're done.  The final part of the library
+string specifies the strand from which the read originates in a strand-specific
+protocol — it is only provided if the library is stranded (i.e. if the
+library format string is of the form S).  The possible values are:
+::
+F = read 1 (or single-end read) comes from the forward strand
+R = read 1 (or single-end read) comes from the reverse strand
+So, for example, if you wanted to specify a fragment library of strand-specific
+paired-end reads, oriented toward each other, where read 1 comes from the
+forward strand and read 2 comes from the reverse strand, you would specify ``-l
+ISF`` on the command line.  This designates that the library being processed has
+the type "ISF" meaning, **I**\ nward (the relative orientation), **S**\ tranded
+(the protocol is strand-specific), **F**\ orward (read 1 comes from the forward
+strand).
+The single end library strings are a bit simpler than their pair-end counter
+parts, since there is no relative orientation of which to speak.  Thus, the
+only possible library format types for single-end reads are ``U`` (for
+unstranded), ``SF`` (for strand-specific reads coming from the forward strand)
+and ``SR`` (for strand-specific reads coming from the reverse strand).
+A few more examples of some library format strings and their interpretations are:
+::
+IU (an unstranded paired-end library where the reads face each other)
+::
+SF (a stranded single-end protocol where the reads come from the forward strand)
+::
+OSR (a stranded paired-end protocol where the reads face away from each other,
+read1 comes from reverse strand and read2 comes from the forward strand)
+.. note:: Correspondence to TopHat library types
+The popular `TopHat <http://ccb.jhu.edu/software/tophat/index.shtml>`_ RNA-seq
+read aligner has a different convention for specifying the format of the library.
+Below is a table that provides the corresponding Salmon/salmon library format
+string for each of the potential TopHat library types:
++---------------------+-------------------------+
+| TopHat              | Salmon (and Sailfish)   |
++=====================+============+============+
+|                     | Paired-end | Single-end |
++---------------------+------------+------------+
+|``-fr-unstranded``   |``-l IU``   |``-l U``    |
++---------------------+------------+------------+
+|``-fr-firststrand``  |``-l ISR``  |``-l SR``   |
++---------------------+------------+------------+
+|``-fr-secondstrand`` |``-l ISF``  |``-l SF``   |
++---------------------+------------+------------+
+The remaining salmon library format strings are not directly expressible in terms
+of the TopHat library types, and so there is no direct mapping for them.
+]]> </help>
+<citations>
+<citation type="doi">10.1101/021592</citation>
+</citations>
+</tool>

Mercurial > repos > bgruening > salmon

comparison old/salmon.xml @ 11:666bb48b1007 draft