changeset 16:49121db48873 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 26e9f1627e91a4be6bdc7e71cd44f1ea1701ee6f"
author bgruening
date Thu, 22 Jul 2021 14:05:28 +0000
parents cc05793bb896
children c8903f357804
files macros.xml salmonquant.xml test-data/postSample.bam vpolo_convert.py
diffstat 4 files changed, 122 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Sat Oct 03 18:22:45 2020 +0000
+++ b/macros.xml	Thu Jul 22 14:05:28 2021 +0000
@@ -1,24 +1,18 @@
 <macros>
-    <token name="@VERSION@">1.3.0</token>
+    <token name="@VERSION@">1.5.1</token>
     <token name="@GALAXY_VERSION@">galaxy0</token>
     <token name="@IDX_VERSION@">q7</token>
+    <token name="@PROFILE_VERSION@">20.01</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">salmon</requirement>
             <requirement type="package" version="1.3">seqtk</requirement>
-            <requirement type="package" version="1.10">samtools</requirement>
+            <requirement type="package" version="1.12">samtools</requirement>
             <requirement type="package" version="0.2.0">vpolo</requirement>
-            <requirement type="package" version="1.0.3">pandas</requirement>
-            <requirement type="package" version="1.4.1">scipy</requirement>
+            <requirement type="package" version="1.3.0">pandas</requirement>
+            <requirement type="package" version="1.7.0">scipy</requirement>
         </requirements>
     </xml>
-    <xml name="stranded">
-        <param name="strandedness" type="select" label="Specify the strandedness of the reads">
-                <option value="U" selected="True">Not stranded (U)</option>
-                <option value="SF">read comes from the forward strand (SF)</option>
-                <option value="SR">read comes from the reverse strand (SR)</option>
-        </param>
-    </xml>
     <xml name="orient">
         <param name="orientation" type="select" label="Relative orientation of reads within a pair">
             <option value="M">Mates are oriented in the same direction (M = matching)</option>
@@ -26,6 +20,40 @@
             <option value="I" selected="True">Mates are oriented toward each other (I = inward)</option>
         </param>
     </xml>
+    <xml name="stranded">
+        <conditional name="libtype" >
+            <param name="strandedness" type="select" label="Specify the strandedness of the reads" help="--libtype">
+                <option value="A" selected="true">Infer automatically (A)</option>
+                <option value="U">Not stranded (U)</option>
+                <option value="SF">read comes from the forward strand (SF)</option>
+                <option value="SR">read comes from the reverse strand (SR)</option>
+            </param>
+            <yield/>
+        </conditional>
+    </xml>
+    <xml name="stranded_se">
+        <expand macro="stranded">
+            <when value="A"/>
+            <when value="U"/>
+            <when value="SF"/>
+            <when value="SR"/>
+        </expand>
+    </xml>
+    <xml name="stranded_pe">
+        <expand macro="stranded">
+            <when value="A">
+            </when>
+            <when value="U">
+                <expand macro="orient"/>
+            </when>
+            <when value="SF">
+                <expand macro="orient"/>
+            </when>
+            <when value="SR">
+                <expand macro="orient"/>
+            </when>
+        </expand>
+    </xml>
     <xml name="index">
         <conditional name="refTranscriptSource">
             <param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?"
@@ -63,23 +91,20 @@
                 </param>
                 <when value="single">
                     <param name="input_singles" type="data" format="fastq,fasta,fastq.gz,fastq.bz2" label="FASTQ/FASTA file" help="FASTQ file." />
-                    <expand macro="stranded"/>
+                    <expand macro="stranded_se"/>
                 </when>
                 <when value="paired">
                     <param name="input_mate1" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 1" help="FASTQ file." />
                     <param name="input_mate2" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 2" help="FASTQ file." />
-                    <expand macro="orient"/>
-                    <expand macro="stranded"/>
+                    <expand macro="stranded_pe"/>
                 </when>
                 <when value="paired_collection">
                     <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
-                    <expand macro="orient"/>
-                    <expand macro="stranded"/>
+                    <expand macro="stranded_pe"/>
                 </when>
                 <when value="paired_interleaved">
                    <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="Interleaved FASTQ file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;. --interleaved"/>
-                   <expand macro="orient"/>
-                   <expand macro="stranded"/>
+                   <expand macro="stranded_pe"/>
                 </when>
             </conditional>
         </section>
@@ -140,9 +165,10 @@
         help="[Experimental]: The fraction of the read that must be covered by MMPs (of length >= 31) if this read is to be considered as 'mapped'. This may help to avoid 'spurious' mappings. A value of 0 (the default) denotes no coverage threshold (a single 31-mer can yield a mapping). Since coverage by exact matching, large, MMPs is a rather strict condition, this value should likely be set to something low, if used."/>
     </xml>
     <xml name="align">
-        <param name="afile" type="data" format="bam" label="Alignment file"/>
+        <param name="afile" type="data" format="qname_input_sorted.bam,qname_sorted.bam" label="Alignment file"/>
+        <param argument="--ont" type="boolean" truevalue="--ont" falsevalue="" label="Is this Alignment file a Oxford Nanopore Technologies (ONT) dataset ?" help="Select this for both cDNA and direct RNA ONT datasets. Enables an alignment error model designed to work with long-read alignments and disables the length effect in the generative model when computing the conditional probability of observing a fragment given that it arises from a specific transcript. This is because in long-read sequencing, we do not expect to observe (i.e. sequence) multiple fragments from the same molecule, and thus we do not expect the transcript length to directly affect the observed fragment count directly"/>
         <param name="transcript" type="data" format="fasta,fa" label="Transcript file"/>
-        <expand macro="stranded"/>
+        <expand macro="stranded_se"/>
         <param name="discardOrphans" type="boolean" truevalue="--discardOrphans" falsevalue="" checked="False"
         label="Discard orphans"
         help="Discard orphan alignments in the input. If this flag is passed, then only paired alignments will be considered toward quantification estimates. The default behavior is to consider orphan alignments if no valid paired mappings exist."/>
@@ -230,7 +256,7 @@
             label="Use the traditional EM algorithm for optimization in the batch passes."
             help=""/>
             <param name="rangeFactorizationBins" type="integer" value="0" label="Range of factorization bins"
-            help="Factorizes the likelihood used in quantification by adopting a new notionof equivalence classes based on the conditional probabilities with which fragments are generated from different transcripts. This is a more fine-grained factorization than the normal rich equivalence classes. The default value (0) corresponds to the standard rich equivalence classes, and larger values imply a more fine-grained factorization. If range factorization is enabled, a common value to select for this parameter is 4."/>
+            help="Factorizes the likelihood used in quantification by adopting a new notion of equivalence classes based on the conditional probabilities with which fragments are generated from different transcripts. This is a more fine-grained factorization than the normal rich equivalence classes. The default value (0) corresponds to the standard rich equivalence classes, and larger values imply a more fine-grained factorization. If range factorization is enabled, a common value to select for this parameter is 4."/>
             <param name="numGibbsSamples" type="integer" value="0" optional="True"
             label="Number of Gibbs sampling rounds to perform."
             help="" />
@@ -360,7 +386,7 @@
         salmon quant
             --index '$index_path'
             #if $quant_type.input.single_or_paired.single_or_paired_opts == 'single':
-                --libType ${quant_type.input.single_or_paired.strandedness}
+                --libType ${quant_type.input.single_or_paired.libtype.strandedness}
                 #if $compressed == 'GZ':
                     --unmatedReads <(zcat < ./single.$ext)
                 #else if $compressed == 'BZ2':
@@ -369,7 +395,12 @@
                     --unmatedReads ./single.$ext
                 #end if
             #else:
-                --libType '${quant_type.input.single_or_paired.orientation}${quant_type.input.single_or_paired.strandedness}'
+                --libType
+                #if $quant_type.input.single_or_paired.libtype.strandedness == 'A'
+                    A
+                #else
+                    ${quant_type.input.single_or_paired.libtype.orientation}${quant_type.input.single_or_paired.libtype.strandedness}
+                #end if
                 #if $quant_type.input.single_or_paired.single_or_paired_opts == 'paired_interleaved':
                     #if $compressed == 'BZ2':
                         --mates1 <(bzcat < ./mate1.$ext | seqtk seq -1)
@@ -435,8 +466,9 @@
         #end if
         salmon quant
         -t '${quant_type.transcript}'
-        -l '${quant_type.strandedness}'
+        -l '${quant_type.libtype.strandedness}'
         -a '${quant_type.afile}'
+        $quant_type.ont
         --threads "\${GALAXY_SLOTS:-4}"
         ${quant_type.discardOrphans}
         ${quant_type.noErrorModel}
--- a/salmonquant.xml	Sat Oct 03 18:22:45 2020 +0000
+++ b/salmonquant.xml	Thu Jul 22 14:05:28 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="salmon" name="Salmon quant" version="@VERSION@+galaxy1">
+<tool id="salmon" name="Salmon quant" version="@VERSION@+@GALAXY_VERSION@" profile="@PROFILE_VERSION@">
     <description>Perform dual-phase, reads or mapping-based estimation of transcript abundance from RNA-seq reads</description>
     <macros>
         <import>macros.xml</import>
@@ -59,10 +59,14 @@
                     </section>
                 </conditional>
                 <section name="input">
-                    <param name="single_or_paired.single_or_paired_opts" value="paired" />
-                    <param name="single_or_paired.input_mate1" value="fastqs/reads_1.fastq" />
-                    <param name="single_or_paired.input_mate2" value="fastqs/reads_2.fastq" />
-                    <param name="single_or_paired.strandedness" value="U"/>
+                    <conditional name="single_or_paired">
+                        <param name="single_or_paired_opts" value="paired" />
+                        <param name="input_mate1" value="fastqs/reads_1.fastq" />
+                        <param name="input_mate2" value="fastqs/reads_2.fastq" />
+                        <conditional name="libtype">
+                            <param name="strandedness" value="U"/>
+                        </conditional>
+                    </conditional>
                 </section>
                 <param name="writeMappings" value="true"/>
             </conditional>
@@ -80,6 +84,43 @@
                     <has_n_columns n="5" />
                 </assert_contents>
             </output>
+            <assert_command>
+                <has_text text="--libType IU"/>
+            </assert_command>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="quant_type">
+                <param name="qtype" value="reads"/>
+                <conditional name="refTranscriptSource">
+                    <param name="TranscriptSource" value="history"/>
+                    <section name="s_index">
+                        <param name="fasta" value="transcripts.fasta"/>
+                    </section>
+                </conditional>
+                <section name="input">
+                    <param name="single_or_paired.single_or_paired_opts" value="paired" />
+                    <param name="single_or_paired.input_mate1" value="fastqs/reads_1.fastq" />
+                    <param name="single_or_paired.input_mate2" value="fastqs/reads_2.fastq" />
+                </section>
+                <param name="writeMappings" value="true"/>
+            </conditional>
+            <output name="output_quant" ftype="tabular">
+                <assert_contents>
+                    <has_text text="EffectiveLength" />
+                    <has_text text="TPM" />
+                    <has_text text="NM_001168316" />
+                    <has_text text="NM_174914" />
+                    <has_text text="NM_018953" />
+                    <has_text text="NR_003084" />
+                    <has_text text="NM_017410" />
+                    <has_text text="NM_153693" />
+                    <has_text text="NR_031764" />
+                    <has_n_columns n="5" />
+                </assert_contents>
+            </output>
+            <assert_command>
+                <has_text text="--libType A"/>
+            </assert_command>
         </test>
         <test expect_num_outputs="1">
             <conditional name="quant_type">
@@ -152,11 +193,30 @@
                 <param name="noErrorModel" value="--noErrorModel"/>
                 <param name="numErrorBins" value="5"/>
             </conditional>
-            <output name="postSample" ftype="bam" value="postSample.bam"/>
+            <!-- bam output uses non-deterministic sampling, so can only use compare="sim_size" -->
+            <output name="postSample" ftype="bam" value="postSample.bam" compare="sim_size" delta="20000"/>
             <assert_stderr>
                 <has_text text="Sampling alignments; outputting results to ./output/postSample.bam"/>
             </assert_stderr>
         </test>
+        <test expect_num_outputs="2">
+            <conditional name="quant_type">
+                <param name="qtype" value="alignment"/>
+                <param name="afile" value="salmonbam.bam"/>
+                <param name="ont" value="true" />
+                <param name="transcript" value="transcripts.fasta"/>
+                <param name="sampleOut" value="--sampleOut"/>
+                <param name="sampleUnaligned" value="--sampleUnaligned"/>
+                <param name="noErrorModel" value="--noErrorModel"/>
+                <param name="numErrorBins" value="5"/>
+            </conditional>
+            <assert_stderr>
+                <has_text text="Sampling alignments; outputting results to ./output/postSample.bam"/>
+            </assert_stderr>
+            <assert_command>
+                <has_text text=" --ont "/>
+            </assert_command>
+        </test>
         <test expect_num_outputs="1">
             <conditional name="quant_type">
                 <param name="qtype" value="reads"/>
Binary file test-data/postSample.bam has changed
--- a/vpolo_convert.py	Sat Oct 03 18:22:45 2020 +0000
+++ b/vpolo_convert.py	Thu Jul 22 14:05:28 2021 +0000
@@ -4,7 +4,6 @@
 
 from vpolo.alevin import parser as par
 
-
 parser = argparse.ArgumentParser()
 parser.add_argument("--mtx", "-m", action="store_true", help="--dumpMtx flag set")
 parser.add_argument("--umi", "-u", action="store_true", help="--dumpUmiGraph flag set")