Mercurial > repos > bgruening > sailfish
changeset 5:1b4ed566a41c draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sailfish commit 03edb751808fef8bce744ebcbad5661a32373211
author | bgruening |
---|---|
date | Wed, 02 Nov 2016 10:30:36 -0400 |
parents | 03c74355227f |
children | 5bc9cd008ceb |
files | README.rst sailfish.tar.bz2 sailfish.xml test-data/gene_map.tab test-data/sailfish_bias_result1.tab test-data/sailfish_genMap_result1.tab test-data/sailfish_quant_result1.tab |
diffstat | 7 files changed, 185 insertions(+), 121 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Sun Sep 18 06:05:57 2016 -0400 +++ b/README.rst Wed Nov 02 10:30:36 2016 -0400 @@ -1,11 +1,10 @@ Galaxy wrappers for sailfish ===================================== -These wrappers are copyright 2014 by Björn Grüning and additional contributors. +These wrappers are copyright 2014 by Björn Grüning, Mhd Ramez Alrawas and additional contributors. All rights reserved. See the licence text below. -Currently tested with sailfish 0.6.3. - +Currently tested with sailfish 0.10.1 Automated Installation ====================== @@ -20,7 +19,8 @@ Version Changes -------- ---------------------------------------------------------------------- v0.6.3.0 - First release - +-------- ---------------------------------------------------------------------- +v0.10.1 - Second release ======== ======================================================================
--- a/sailfish.xml Sun Sep 18 06:05:57 2016 -0400 +++ b/sailfish.xml Wed Nov 02 10:30:36 2016 -0400 @@ -1,9 +1,5 @@ -<tool id="sailfish" name="Sailfish" version="0.7.6.1"> +<tool id="sailfish" name="Sailfish" version="0.10.1"> <description>transcript quantification from RNA-seq data</description> - <requirements> - <requirement type="package" version="0.7.6">sailfish</requirement> - <requirement type="package" version="1.57.0">boost</requirement> - </requirements> <macros> <xml name="strandedness"> <param name="strandedness" type="select" label="Specify the strandedness of the reads"> @@ -13,6 +9,9 @@ </param> </xml> </macros> + <requirements> + <requirement type="package" version="0.10.1">sailfish</requirement> + </requirements> <stdio> <exit_code range="1:" /> <exit_code range=":-1" /> @@ -23,7 +22,6 @@ <version_command>sailfish -version</version_command> <command> <![CDATA[ - #if $refTranscriptSource.TranscriptSource == "history": sailfish index --transcripts $refTranscriptSource.ownFile @@ -34,35 +32,26 @@ #else: #set $index_path = $refTranscriptSource.index.fields.path #end if - && - #if $single_or_paired.single_or_paired_opts == 'single': - #if $single_or_paired.input_singles.ext == 'fasta': #set $ext = 'fasta' #else: #set $ext = 'fastq' #end if - ln -s $single_or_paired.input_singles ./single.$ext && #else: - #if $single_or_paired.input_mate1.ext == 'fasta': #set $ext = 'fasta' #else: #set $ext = 'fastq' #end if - ln -s $single_or_paired.input_mate1 ./mate1.$ext && ln -s $single_or_paired.input_mate2 ./mate2.$ext && #end if - - #if $geneMap: ln -s "$geneMap" ./geneMap.$geneMap.ext && #end if - sailfish quant --index $index_path #if $single_or_paired.single_or_paired_opts == 'single': @@ -73,34 +62,51 @@ --mates2 ./mate2.$ext --libType "${single_or_paired.orientation}${single_or_paired.strandedness}" #end if - --output ./ + --output ./results $biasCorrect + $gcBiasCorrect --threads "\${GALAXY_SLOTS:-4}" - - #if $fldMean: + $dumpEq + #if str($gcSizeSamp): + --gcSizeSamp $gcSizeSamp + #end if + #if str($gcSpeedSamp): + --gcSpeedSamp $gcSpeedSamp + #end if + #if str($fldMean): --fldMean $fldMean #end if - - #if $fldSD: + #if str($fldSD): --fldSD $fldSD #end if - #if $maxReadOcc: --maxReadOcc $maxReadOcc #end if - #if $geneMap: --geneMap ./geneMap.${geneMap.ext} #end if - + $strictIntersect $noEffectiveLengthCorrection $useVBOpt - $allowOrphans - + $discardOrphans $unsmoothedFLD --maxFragLen ${maxFragLen} - --txpAggregationKey "${txpAggregationKey}" - + --txpAggregationKey '${txpAggregationKey}' + $ignoreLibCompat + $enforceLibCompat + $allowDovetail + #if str($numBiasSamples): + --numBiasSamples $numBiasSamples + #end if + #if str($numFragSamples): + --numFragSamples $numFragSamples + #end if + #if str($numGibbsSamples): + --numGibbsSamples $numGibbsSamples + #end if + #if str($numBootstraps): + --numBootstraps $numBootstraps + #end if ]]> </command> <inputs> @@ -118,7 +124,7 @@ </param> </when> <!-- build-in --> <when value="history"> - <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference transcriptome" help="in FASTA format" /> + <param name="ownFile" type="data" format="fasta" label="Select the reference transcriptome" help="in FASTA format" /> <param argument="kmerSize" type="integer" value="21" max="32" label="The size of the k-mer on which the index is built" help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, @@ -152,47 +158,109 @@ where each line contains the name of a transcript and the gene to which it belongs separated by a tab." /> <param argument="--biasCorrect" type="boolean" truevalue="--biasCorrect" falsevalue="" checked="False" - label="Perform bias correction" help=""/> + label="Perform sequence-specific bias correction" help=""/> + + <param argument="--gcBiasCorrect" type="boolean" truevalue="--gcBiasCorrect" falsevalue="" checked="False" + label="Perform fragment GC bias correction" help=""/> + + <param argument="--dumpEq" type="boolean" truevalue="--dumpEq" falsevalue="" checked="False" + label="Dump the equivalence class counts that were computed during quasi-mapping." help=""/> + + <param argument="--gcSizeSamp" type="integer" value="1" optional="True" + label="The value by which to down-sample transcripts when representing the GC content" + help="Larger values will reduce memory usage, but may decrease the fidelity of bias modeling results."/> + + <param argument="--gcSpeedSamp" type="integer" value="1" optional="True" + label="The value at which the fragment length PMF is down-sampled when evaluating GC fragment bias." + help="Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results."/> + + <param argument="--strictIntersect" type="boolean" truevalue="--strictIntersect" falsevalue="" checked="False" + label="Strict Intersect." help="When this flag is set, if the intersection of the + quasi-mappings for the left and right is empty, then all mappings for the left and all mappings + for the right read are reported as orphaned quasi-mappings."/> <param argument="--fldMean" type="integer" value="200" optional="True" label="Calculate effective lengths" - help="If single end reads are being used for quantification, or there are an insufficient number of uniquely mapping reads when performing paired-end quantification - to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/> + help="If single end reads are being used for quantification, or there are an insufficient number of uniquely + mapping reads when performing paired-end quantification + to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/> <param argument="--fldSD" type="integer" value="80" optional="True" label="Standard deviation" - help="The standard deviation used in the fragment length distribution for single-end quantification or when an empirical distribution cannot be learned."/> + help="The standard deviation used in the fragment length distribution for single-end quantification or + when an empirical distribution cannot be learned."/> <param argument="--maxReadOcc" type="integer" value="200" optional="True" label="Maximal read mapping occurence" help="Reads mapping to more than this many places won't be considered."/> <param argument="--noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False" - label="Disable effective length correction" help="Disables effective length correction when computing the probability that a fragment was generated from a transcript. + label="Disable effective length correction" help="Disables effective length correction when computing the probability + that a fragment was generated from a transcript. If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/> <param argument="--useVBOpt" type="boolean" truevalue="--useVBOpt" falsevalue="" checked="False" - label="Use Variational Bayesian EM algorithm for optimization" help=""/> + label="Use Variational Bayesian EM algorithm for optimization" help="Use Variational Bayesian EM algorithm rather + than the traditional EM angorithm for optimization"/> - <param argument="--allowOrphans" type="boolean" truevalue="--allowOrphans" falsevalue="" checked="False" - label="Consider orphaned reads as valid hits when performing lightweight-alignment" - help="This option will increase sensitivity (allow more reads to map and more transcripts to be detected), but may decrease specificity as orphaned alignments are more likely to be spurious."/> + <param argument="--discardOrphans" type="boolean" truevalue="--discardOrphans" falsevalue="" checked="False" + label="Discard orphaned reads as valid hits when performing lightweight-alignment" + help="This option will discard orphaned fragments. This only has an effect on paired-end input, but enabling this option will discard, rather than count, any reads where only one of the paired fragments maps to a transcript."/> <param argument="--unsmoothedFLD" type="boolean" truevalue="--unsmoothedFLD" falsevalue="" checked="False" - label="Use the un-smoothed approach to effective length correction" help="This traditional approach works by convolving the FLD with the characteristic function over each transcript."/> + label="Use the un-smoothed approach to effective length correction" help="This traditional approach works by convolving the FLD with the + characteristic function over each transcript."/> <param argument="--maxFragLen" type="integer" value="1000" optional="True" label="The maximum length of a fragment to consider when building the empirical fragment length distribution" help=""/> - <param argument="--txpAggregationKey" value="gene_id" type="text" label="The key for aggregating transcripts during gene-level estimates" - help="The default is the gene_id field, but other fields (e.g. gene_name) might be useful depending on the specifics of the annotation being used." /> + <param argument="--txpAggregationKey" value="gene_id" type="text" label="The key for aggregating transcripts during gene-level estimates"> + <help> + <![CDATA[ + When generating the gene-level estimates, use the provided key for aggregating transcripts. The default is the "gene_id" field, + but other fields (e.g. "gene_name") might be useful depending on the specifics of the annotation being used. Note: this option only + affects aggregation when using a GTF annotation; not an annotation in "simple" format.]]> + </help> + </param> + <param argument="--ignoreLibCompat" type="boolean" truevalue="--ignoreLibCompat" falsevalue="" checked="False" + label="Disables strand-aware processing completely."> + <help> + <![CDATA[ + All hits are considered "Valid".]]> + </help> + </param> + <param argument="--enforceLibCompat" type="boolean" truevalue="--enforceLibCompat" falsevalue="" checked="False" + label="Enforces strict library compatibility."> + <help> + <![CDATA[ + Fragments that map in a manner other than what is specified by the expected library type will be discarded, + even if there are no mappings that agree with the expected library type.]]> + </help> + </param> + <param argument="--allowDovetail" type="boolean" truevalue="--allowDovetail" falsevalue="" checked="False" + label="Allow paired-end reads from the same fragment to dovetail."> + <help> + <![CDATA[ + Allow paired-end reads from the same fragment to "dovetail", such that the ends of the mapped reads can extend past each other.]]> + </help> + </param> + <param argument="--numBiasSamples" type="integer" value="1000000" optional="True" + label="Number of fragment mappings to use when learning the sequene-specific bias model" + help=""/> + <param argument="--numFragSamples" type="integer" value="10000" optional="True" + label="Number of fragments from unique alignments to sample when building the fragment length distribution" + help=""/> + <param argument="--numGibbsSamples" type="integer" value="0" optional="True" + label="Number of Gibbs sampling rounds to perform." + help=""/> + <param argument="--numBootstraps" type="integer" value="0" optional="True" + label="Number of bootstrap samples to generate." + help="This is mutually exclusive with Gibbs"/> + </inputs> - </inputs> + <outputs> - <data name="output_quant" format="tabular" from_work_dir="quant.sf" label="${tool.name} on ${on_string} (Quantification)" /> - <data name="output_bias_corrected_quant" format="tabular" from_work_dir="quant_bias_corrected.sf" label="${tool.name} on ${on_string} (Bias corrected Quantification)"> - <filter>biasCorrect is True</filter> - </data> - <data name="output_gene_quant" format="tabular" from_work_dir="quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)"> - <filter>geneMap is True</filter> + <data name="output_quant" format="tabular" from_work_dir="results/quant.sf" label="${tool.name} on ${on_string} (Quantification)" /> + <data name="output_gene_quant" format="tabular" from_work_dir="results/quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)"> + <filter>geneMap</filter> </data> </outputs> <tests> @@ -200,15 +268,33 @@ <param name="single_or_paired_opts" value="paired" /> <param name="input_mate1" value="reads_1.fastq" /> <param name="input_mate2" value="reads_2.fastq" /> - <param name="biasCorrect" value="True" /> + <param name="biasCorrect" value="False" /> <param name="TranscriptSource" value="history" /> <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> <output file="sailfish_quant_result1.tab" ftype="tabular" name="output_quant" /> - <output file="sailfish_bias_result1.tab" ftype="tabular" name="output_bias_corrected_quant" /> + </test> + <test> + <param name="single_or_paired_opts" value="paired" /> + <param name="input_mate1" value="reads_1.fastq" /> + <param name="input_mate2" value="reads_2.fastq" /> + <param name="biasCorrect" value="True" /> + <param name="TranscriptSource" value="history" /> + <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> + <output file="sailfish_bias_result1.tab" ftype="tabular" name="output_quant" /> + </test> + <test> + <param name="single_or_paired_opts" value="paired" /> + <param name="input_mate1" value="reads_1.fastq" /> + <param name="input_mate2" value="reads_2.fastq" /> + <param name="biasCorrect" value="True" /> + <param name="TranscriptSource" value="history" /> + <param name="ownFile" value="transcripts.fasta" ftype="fasta" /> + <param name="geneMap" value="gene_map.tab" ftype="tabular" /> + <output file="sailfish_bias_result1.tab" ftype="tabular" name="output_quant" /> + <output file="sailfish_genMap_result1.tab" ftype="tabular" name="output_gene_quant" /> </test> </tests> - <help> -<![CDATA[ + <help><![CDATA[ **What it does** @@ -336,6 +422,8 @@ of the TopHat library types, and so there is no direct mapping for them. -]]> - </help> + ]]></help> + <citations> + <citation type="doi">10.1038/nbt.2862</citation> + </citations> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_map.tab Wed Nov 02 10:30:36 2016 -0400 @@ -0,0 +1,3 @@ +NM_174914 foo +NM_001168316 bar +NR_003084 baz
--- a/test-data/sailfish_bias_result1.tab Sun Sep 18 06:05:57 2016 -0400 +++ b/test-data/sailfish_bias_result1.tab Wed Nov 02 10:30:36 2016 -0400 @@ -1,32 +1,16 @@ -# sailfish (quasi-mapping-based) v0.7.6 -# [ program ] => sailfish -# [ command ] => quant -# [ index ] => { ./index_dir } -# [ mates1 ] => { ./mate1.fastq } -# [ mates2 ] => { ./mate2.fastq } -# [ libType ] => { IU } -# [ output ] => { ./ } -# [ biasCorrect ] => { } -# [ threads ] => { 1 } -# [ fldMean ] => { 200 } -# [ fldSD ] => { 80 } -# [ maxReadOcc ] => { 200 } -# [ maxFragLen ] => { 1000 } -# [ txpAggregationKey ] => { gene_id } -# [ mapping rate ] => { 100% } -# Name Length TPM NumReads -NM_022658 2288 378838 4881 -NM_174914 2385 111257 1500.04 -NM_017410 2396 3099.5 42 -NM_018953 1612 26168.3 228 -NM_001168316 2283 12398.5 159.361 -NM_004503 1681 36198.8 330.806 -NR_003084 1640 0 0 -NM_173860 849 240218 962 -NM_006897 1541 80244.3 664 -NM_153693 2072 6430.57 74.2815 -NR_031764 1853 10254.5 104.595 -NM_014620 2300 45132.7 584.838 -NM_153633 1666 40578.4 367.074 -NM_014212 2037 4852.08 55 -NM_017409 1959 4330.19 47 +Name Length EffectiveLength TPM NumReads +NM_001168316 2283 1528.95 12702.4 158.926 +NM_174914 2385 1599.63 114719 1501.66 +NR_031764 1853 1214.33 10407.1 103.415 +NM_004503 1681 1085.83 37300.1 331.428 +NM_006897 1541 984.724 82401.9 664 +NM_014212 2037 1316.12 5106.81 55 +NM_014620 2300 1541.27 46908.6 591.628 +NM_017409 1959 1273.24 4510.99 47 +NM_017410 2396 1562.29 3285.28 42 +NM_018953 1612 1019.15 27338 227.994 +NM_022658 2288 1634.87 364846 4881 +NM_153633 1666 1082.85 40694.7 360.597 +NM_153693 2072 1374.67 6520.1 73.3448 +NM_173860 849 483.271 243258 962 +NR_003084 1640 1052.77 1.09566 0.00943897
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sailfish_genMap_result1.tab Wed Nov 02 10:30:36 2016 -0400 @@ -0,0 +1,5 @@ +Name Length EffectiveLength TPM NumReads +baz 1640 1052.77 1.09566 0.00943897 +NR_031764 1853 1214.33 10407.1 103.415 +foo 348.949 235.269 976889 9737.65 +bar 2283 1528.95 12702.4 158.926
--- a/test-data/sailfish_quant_result1.tab Sun Sep 18 06:05:57 2016 -0400 +++ b/test-data/sailfish_quant_result1.tab Wed Nov 02 10:30:36 2016 -0400 @@ -1,32 +1,16 @@ -# sailfish (quasi-mapping-based) v0.7.6 -# [ program ] => sailfish -# [ command ] => quant -# [ index ] => { ./index_dir } -# [ mates1 ] => { ./mate1.fastq } -# [ mates2 ] => { ./mate2.fastq } -# [ libType ] => { IU } -# [ output ] => { ./ } -# [ biasCorrect ] => { } -# [ threads ] => { 1 } -# [ fldMean ] => { 200 } -# [ fldSD ] => { 80 } -# [ maxReadOcc ] => { 200 } -# [ maxFragLen ] => { 1000 } -# [ txpAggregationKey ] => { gene_id } -# [ mapping rate ] => { 100% } -# Name Length TPM NumReads -NM_001168316 2283 12398.5 159.361 -NM_174914 2385 111257 1500.04 -NR_031764 1853 10254.5 104.595 -NM_004503 1681 36198.8 330.806 -NM_006897 1541 80244.3 664 -NM_014212 2037 4852.08 55 -NM_014620 2300 45132.7 584.838 -NM_017409 1959 4330.19 47 -NM_017410 2396 3099.5 42 -NM_018953 1612 26168.3 228 -NM_022658 2288 378838 4881 -NM_153633 1666 40578.4 367.074 -NM_153693 2072 6430.57 74.2815 -NM_173860 849 240218 962 -NR_003084 1640 0 0 +Name Length EffectiveLength TPM NumReads +NM_001168316 2283 2082.61 12552.5 161.366 +NM_174914 2385 2184.61 111020 1497.1 +NR_031764 1853 1652.61 10345.6 105.535 +NM_004503 1681 1480.61 36162.7 330.503 +NM_006897 1541 1340.61 80240.2 664 +NM_014212 2037 1836.61 4851.45 55 +NM_014620 2300 2099.61 45082 584.273 +NM_017409 1959 1758.61 4329.67 47 +NM_017410 2396 2195.61 3098.99 42 +NM_018953 1612 1411.61 26165.8 227.994 +NM_022658 2288 2087.61 378779 4881 +NM_153633 1666 1465.61 40626.6 367.539 +NM_153693 2072 1871.61 6464.46 74.683 +NM_173860 849 648.611 240280 962 +NR_003084 1640 1439.61 1.04309 0.00926914