changeset 5:c23da6257d6a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 2082c018009fa73c4afee8313febab13bb807ea8"
author iuc
date Wed, 16 Oct 2019 05:24:45 -0400 (2019-10-16)
parents 58b278def57e
children 178bdbdb6d24
files rg_rnaStarSolo.xml
diffstat 1 files changed, 147 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/rg_rnaStarSolo.xml	Fri Sep 06 11:10:22 2019 -0400
+++ b/rg_rnaStarSolo.xml	Wed Oct 16 05:24:45 2019 -0400
@@ -2,7 +2,7 @@
     <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description>
     <macros>
         <import>macros.xml</import>
-        <token name="@WRAPPER@"></token>
+        <token name="@WRAPPER@">1</token>
     </macros>
     <expand macro="requirements"/>
     <expand macro="stdio" >
@@ -14,13 +14,52 @@
     STAR
     @REFGENOMEHANDLING@
 
-    ## cDNA sequence always goes first, then barcode
+    ## Check that the input pairs are of the same type
+    ## otherwise STARsolo will run for a long time and then error out.
+    ## We consume either repeats of two inputs R1 + R2
+    ## or a collection of paired reads.
+
+    #try
+        #set $last = None
+        #for $x in $input_types.input_repeats:
+            #if str($input_types.use) == "repeat":
+                #set $r1 = $x.input1
+                #set $r2 = $x.input2
+            #elif str($input_types.use) == "list_paired":
+                #set $r1 = $x.forward
+                #set $r2 = $x.reverse
+            #else
+                Wrong Type
+                #stop
+            #end if
+
+            #assert $r1.datatype == $r2.datatype
+
+            ## Test that all pairs are of the same type
+            #if $last:
+                #assert $last.datatype == $r1.datatype
+            #end if
+            #set $last = $r1
+        #end for
+    #except AssertionError
+        Input types are not the same!
+        #stop
+    #end try
+
+    ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1]
+    ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs
+    #if str($input_types.use) == "repeat":
+        #set $reads2 = ','.join([ '%s' % $x.input2 for $i,$x in enumerate($input_types.input_repeats)])
+        #set $reads1 = ','.join([ '%s' % $x.input1 for $i,$x in enumerate($input_types.input_repeats)])
+    #else if str($input_types.use) == "list_paired"
+        #set $reads2 = ','.join([ '%s' % $x.reverse for $i,$x in enumerate($input_types.input_repeats)])
+        #set $reads1 = ','.join([ '%s' % $x.forward for $i,$x in enumerate($input_types.input_repeats)])
+    #end if
+
     --readFilesIn
-    #set $reads2 = ','.join([ '%s' % $x.input2 for $i,$x in enumerate($input_repeats)])
-    #set $reads1 = ','.join([ '%s' % $x.input1 for $i,$x in enumerate($input_repeats)])
     $reads2 $reads1
 
-    #if $input_repeats[0].input1.is_of_type('fastq.gz', 'fastqsanger.gz'):
+    #if $last.is_of_type('fastq.gz', 'fastqsanger.gz'):
         @FASTQ_GZ_OPTION@
     #end if
 
@@ -29,8 +68,8 @@
 
     ## 1 - check length of barcode, 0 - do not check
     ## Good for checking custom chemistries
-    --soloBarcodeReadLength 1
     --soloCBwhitelist '$soloCBwhitelist'
+    --soloBarcodeReadLength '$solo.soloBarcodeReadLength'
 
     #if str($solo.params.chemistry) == "CR2":
     --soloCBstart 1
@@ -54,10 +93,21 @@
     --soloUMIdedup '$solo.soloUMIdedup'
     ]]></command>
     <inputs>
-        <repeat name="input_repeats" title="Input Pairs" min="1" >
-            <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, Barcode reads"/>
-            <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
-        </repeat>
+        <conditional name="input_types" >
+            <param name="use" type="select" label="Input Type" >
+                <option value="repeat" >Single files</option>
+                <option value="list_paired" >List of Pairs</option>
+            </param>
+            <when value="repeat">
+                <repeat name="input_repeats" title="Input Pairs" min="1" >
+                    <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, Barcode reads"/>
+                    <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
+                </repeat>
+            </when>
+            <when value="list_paired">
+                <param name="input_repeats" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" />
+            </when>
+        </conditional>
         <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" />
         <expand macro="refgenomehandling" />
         <section name="solo" title="Advanced Settings" expanded="true">
@@ -91,6 +141,7 @@
                 <option value="1MM_Directional" >Directional</option>
                 <option value="1MM_NotCollapsed" >None</option>
             </param>
+            <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." />
         </section>
     </inputs>
     <outputs>
@@ -112,10 +163,13 @@
     </outputs>
     <tests>
         <test expect_num_outputs="5">
-            <repeat name="input_repeats" >
-                <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-            </repeat>
+            <conditional name="input_types">
+                <param name="use" value="repeat" />
+                <repeat name="input_repeats" >
+                    <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                </repeat>
+            </conditional>
             <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
@@ -153,10 +207,13 @@
             </output>
         </test>
         <test expect_num_outputs="5">
-            <repeat name="input_repeats" >
-                <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-            </repeat>
+            <conditional name="input_types">
+                <param name="use" value="repeat" />
+                <repeat name="input_repeats" >
+                    <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                </repeat>
+            </conditional>
             <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
@@ -188,18 +245,78 @@
             </output>
         </test>
         <test expect_num_outputs="5">
-            <repeat name="input_repeats" >
-                <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-            </repeat>
-            <repeat name="input_repeats" >
-                <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-            </repeat>
-            <repeat name="input_repeats" >
-                <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
-            </repeat>
+            <!-- Multiple repeats test -->
+            <conditional name="input_types">
+                <param name="use" value="repeat" />
+                <repeat name="input_repeats" >
+                    <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                </repeat>
+                <repeat name="input_repeats" >
+                    <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                </repeat>
+                <repeat name="input_repeats" >
+                    <param name="input1" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                </repeat>
+            </conditional>
+            <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="SNORD83B.22.fa" />
+                <param name="genomeSAindexNbases" value="4" />
+                <conditional name="GTFconditional">
+                    <param name="GTFselect" value="with-gtf" />
+                    <param name="sjdbOverhang" value="75" />
+                    <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>
+                </conditional>
+            </conditional>
+            <section name="solo" >
+                <conditional name="params">
+                    <param name="chemistry" value="custom" />
+                    <param name="soloCBstart" value="1" />
+                    <param name="soloCBlen" value="16" />
+                    <param name="soloUMIstart" value="17" />
+                    <param name="soloUMIlen" value="10" />
+                </conditional>
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="GeneFull" />
+                <param name="soloUMIdedup" value="1MM_Directional" />
+            </section>
+            <output name="output_barcodes" >
+                <assert_contents>
+                    <has_line line="TTTGTCATCTTAGAGC" />
+                    <has_line line="TTTGTCATCTTTCCTC" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="5">
+            <!-- Same as the test before but with a collection of pairs -->
+            <conditional name="input_types">
+                <param name="use" value="list_paired" />
+                <param name="input_repeats" >
+                    <collection type="list:paired">
+                        <element name="Pair1">
+                            <collection type="paired">
+                                <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                                <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                            </collection>
+                        </element>
+                        <element name="Pair2">
+                            <collection type="paired">
+                                <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                                <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" />
+                            </collection>
+                        </element>
+                        <!-- Planemo does not support more than 2 elements in a list of pairs -->
+                        <!-- <element name="Pair3"> -->
+                        <!--     <element name="forward" value="41737_R1_sub240k.fastq.gz" ftype="fastqsanger.gz" /> -->
+                        <!--     <element name="reverse" value="41737_R2_sub240k.fastq.gz" ftype="fastqsanger.gz" /> -->
+                        <!-- </element> -->
+                    </collection>
+                </param>
+            </conditional>
             <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />