changeset 9:ec9cbd6b9a49 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 00c545ddbf0f008903f4b4c11d476e6089c3f531"
author iuc
date Fri, 15 Jan 2021 17:39:11 +0000
parents 00fbfac99d39
children a6fba3d92531
files macros.xml rg_rnaStarSolo.xml test-data/indrop.R1.fastq.gz test-data/indrop.R2.fastq.gz test-data/indrop.barcodes1.txt test-data/indrop.barcodes2.txt test-data/smartseq.cellids.txt test-data/smartseq1.R1.fastq.gz test-data/smartseq1.R2.fastq.gz test-data/smartseq2.R1.fastq.gz test-data/smartseq2.R2.fastq.gz test-data/smartseq3.R1.fastq.gz test-data/smartseq3.R2.fastq.gz test-data/smartseq4.R1.fastq.gz test-data/smartseq4.R2.fastq.gz test-data/smartseq5.R1.fastq.gz test-data/smartseq5.R2.fastq.gz test-data/smartseq6.R1.fastq.gz test-data/smartseq6.R2.fastq.gz test-data/smartseq7.R1.fastq.gz test-data/smartseq7.R2.fastq.gz test-data/smartseq8.R1.fastq.gz test-data/smartseq8.R2.fastq.gz test-data/smartseq9.R1.fastq.gz test-data/smartseq9.R2.fastq.gz
diffstat 25 files changed, 1373 insertions(+), 149 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Fri Dec 04 22:05:36 2020 +0000
+++ b/macros.xml	Fri Jan 15 17:39:11 2021 +0000
@@ -5,7 +5,7 @@
     the index versions in sync, but you should manually adjust the +galaxy
     version number. -->
     <!-- STAR version to be used -->
-    <token name="@VERSION@">2.7.6a</token>
+    <token name="@VERSION@">2.7.7a</token>
     <!-- STAR index version compatible with this version of STAR
     This is the STAR version that introduced the index structure expected
     by the current version.
@@ -33,7 +33,7 @@
             <edam_operation>operation_0292</edam_operation>
         </edam_operations>
     </xml>
-    
+
     <xml name="index_selection" token_with_gene_model="0">
         <param argument="--genomeDir" name="genomeDir" type="select"
         label="Select reference genome"
@@ -132,6 +132,35 @@
         #end if
         #end if
         ]]></token>
+    <token name="@READSHANDLING@" ><![CDATA[
+    ## Check that the input pairs are of the same type
+    ## otherwise STARsolo will run for a long time and then error out.
+    ## We consume either repeats of two inputs R1 + R2
+    ## or a collection of paired reads.
+    #if str($sc.input_types.use) == "repeat":
+        #set $reads1 = []
+        #set $reads2 = []
+        #for $r1, $r2 in zip($sc.input_types.input1, $sc.input_types.input2):
+            #assert $r1.datatype == $r2.datatype
+            #silent $reads1.append(str($r1))
+            #silent $reads2.append(str($r2))
+        #end for
+        #set $reads1 = ','.join($reads1)
+        #set $reads2 = ','.join($reads2)
+    #elif str($sc.input_types.use) == "list_paired":
+        #set $r1 = $sc.input_types.input_collection.forward
+        #set $r2 = $sc.input_types.input_collection.reverse
+        #set $reads1 = $r1
+        #set $reads2 = $r2
+    #end if
+    ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1]
+    ## see: Section 3.2 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs
+    --readFilesIn $reads2 $reads1
+    --soloCBmatchWLtype $sc.soloCBmatchWLtype
+    #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'):
+        @FASTQ_GZ_OPTION@
+    #end if
+    ]]></token>
     <xml name="ref_selection">
         <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />
         <!-- Currently, this parameter is not exposed in the wrapper,
@@ -148,4 +177,53 @@
             <yield />
         </stdio>
     </xml>
+    <xml name="input_selection">
+        <conditional name="input_types" >
+            <param name="use" type="select" label="Input Type" >
+                <option value="repeat" >Separate barcode and cDNA reads</option>
+                <option value="list_paired" >Paired collection of barcode and cDNA reads</option>
+            </param>
+            <when value="repeat">
+                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data"  multiple="true"
+                label="RNA-Seq FASTQ/FASTA file, Barcode reads" />
+                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data"  multiple="true"
+                label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
+            </when>
+            <when value="list_paired">
+                <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="input_selection_smart_seq">
+        <conditional name="input_types_smart_seq" >
+            <param name="use" type="select" label="Input Type" >
+                <option value="list_single_end" >Single-end FASTQ collection</option>
+                <option value="list_paired_end" >Paired FASTQ collection</option>
+            </param>
+            <when value="list_single_end">
+                <param name="single_end_collection" collection_type="list" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of single-end FASTQ files" />
+            </when>
+            <when value="list_paired_end">
+                <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of paired-end FASTQ files" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="umidedup_options">
+        <option value="1MM_All" selected="true">All</option>
+        <option value="1MM_Directional" >Directional</option>
+    </xml>
+    <xml name="anchor_types">
+        <option value="0">Read start</option>
+        <option value="1">Read end</option>
+        <option value="2">Adapter start</option>
+        <option value="3">Adapter end</option>
+    </xml>
+    <xml name="cb_match_wl_common">
+        <option value="Exact" >Exact</option>
+        <option value="1MM" >Single match</option>
+    </xml>
+    <xml name="cb_match_wl_cellranger">
+        <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2)</option>
+        <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3)</option>
+    </xml>
 </macros>
--- a/rg_rnaStarSolo.xml	Fri Dec 04 22:05:36 2020 +0000
+++ b/rg_rnaStarSolo.xml	Fri Jan 15 17:39:11 2021 +0000
@@ -17,68 +17,89 @@
     STAR
     @REFGENOMEHANDLING@
 
-    --readFilesIn
-    ## Check that the input pairs are of the same type
-    ## otherwise STARsolo will run for a long time and then error out.
-    ## We consume either repeats of two inputs R1 + R2
-    ## or a collection of paired reads.
+    ## Supports Drop-seq, 10X Chromium, inDrop and Smart-Seq
+    --soloType $sc.solo_type
 
-    #if str($input_types.use) == "repeat":
-        #set $reads1 = []
-        #set $reads2 = []
-        #for $r1, $r2 in zip($input_types.input1, $input_types.input2):
-            #assert $r1.datatype == $r2.datatype
-            #silent $reads1.append(str($r1))
-            #silent $reads2.append(str($r2))
-        #end for
-        #set $reads1 = ','.join($reads1)
-        #set $reads2 = ','.join($reads2)
-    #elif str($input_types.use) == "list_paired":
-        #set $r1 = $input_types.input_collection.forward
-        #set $r2 = $input_types.input_collection.reverse
-        #set $reads1 = $r1
-        #set $reads2 = $r2
-    #end if
-
-    ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1]
-    ## see: Section 3.1 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs
-    $reads2 $reads1
-
-    #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'):
-        @FASTQ_GZ_OPTION@
-    #end if
-
-    ## Droplet is the only mode available for now
-    --soloType Droplet
-
+    #if str($sc.solo_type) == "CB_UMI_Simple":
+    @READSHANDLING@
+    --soloCBwhitelist '$sc.soloCBwhitelist'
     ## 1 - check length of barcode, 0 - do not check
     ## Good for checking custom chemistries
-    --soloCBwhitelist '$soloCBwhitelist'
-    --soloBarcodeReadLength $solo.soloBarcodeReadLength
-
-    #if str($solo.params.chemistry) == "CR2":
+    --soloBarcodeReadLength $sc.soloBarcodeReadLength
+    #if str($sc.params.chemistry) == "CR2":
     --soloCBstart 1
     --soloCBlen 16
     --soloUMIstart 17
     --soloUMIlen 10
-    #else if str($solo.params.chemistry) == "CR3":
+    #else if str($sc.params.chemistry) == "CR3":
     --soloCBstart 1
     --soloCBlen 16
     --soloUMIstart 17
     --soloUMIlen 12
-    #else if str($solo.params.chemistry) == "custom":
-    --soloCBstart $solo.params.soloCBstart
-    --soloCBlen $solo.params.soloCBlen
-    --soloUMIstart $solo.params.soloUMIstart
-    --soloUMIlen $solo.params.soloUMIlen
+    #else if str($sc.params.chemistry) == "custom":
+    --soloCBstart $sc.params.soloCBstart
+    --soloCBlen $sc.params.soloCBlen
+    --soloUMIstart $sc.params.soloUMIstart
+    --soloUMIlen $sc.params.soloUMIlen
     #end if
 
+    #elif str($sc.solo_type) == "CB_UMI_Complex":
+    @READSHANDLING@
+    ## inDrop supports multiple cell barcodes of varying length
+        #set $cb_whitelist = []
+        #set $cb_pos = []
+        #for $cb in $sc.cb_whitelists:
+            #silent $cb_whitelist.append(str($cb.whitelist_file))
+            #silent $cb_pos.append('_'.join([str($cb.cb_start_anchor), str($cb.cb_start_anchor_pos),str($cb.cb_end_anchor), str($cb.cb_end_anchor_pos)]))
+        #end for
+    #set $cb_whitelist = ' '.join($cb_whitelist)
+    --soloCBwhitelist $cb_whitelist
+    #set $cb_pos = ' '.join($cb_pos)
+    --soloCBposition $cb_pos
+    #set $umi_pos = '_'.join([str($sc.umi_start_anchor), str($sc.umi_start_anchor_pos), str($sc.umi_end_anchor), str($sc.umi_end_anchor_pos)])
+    --soloUMIposition $umi_pos
+    --soloAdapterSequence $sc.soloAdapterSequence
+    --soloAdapterMismatchesNmax $sc.soloAdapterMismatchesNmax
+
+    #elif str($sc.solo_type) == "SmartSeq":
+    ## Create a manifest file with fastq files and their corresponding cell-ids
+    ## For Smart-Seq [R1] is followed by [R2]
+    --readFilesManifest '$manifest_file'
+        #set $read_files_command = ""
+        #if str($sc.input_types_smart_seq.use) == "list_single_end":
+            #if $sc.input_types_smart_seq.single_end_collection[0].is_of_type('fastq.gz', 'fastqsanger.gz'):
+                @FASTQ_GZ_OPTION@
+            #end if
+        #elif str($sc.input_types_smart_seq.use) == "list_paired_end":
+            #if $sc.input_types_smart_seq.paired_end_collection[0].forward.is_of_type('fastq.gz', 'fastqsanger.gz'):
+                @FASTQ_GZ_OPTION@
+            #end if
+        #end if
+    --soloCBwhitelist None
+    #end if
+
+    --soloUMIfiltering $solo.soloUMIfiltering
     --soloStrand $solo.soloStrand
     --soloFeatures $solo.soloFeatures
-    --soloUMIdedup $solo.soloUMIdedup
+    --soloUMIdedup $sc.soloUMIdedup
     --quantMode TranscriptomeSAM
     --outSAMtype BAM Unsorted
 
+    #if str($solo.filter.filter_type) == "cellranger2":
+    --soloCellFilter CellRanger2.2 $solo.filter.n_expected $solo.filter.max_perc $solo.filter.max_min_ratio
+    #else if str($solo.filter.filter_type) == "topcells":
+    --soloCellFilter TopCells $solo.filter.n_cells
+    #else if str($solo.filter.filter_type) == "no_filter":
+    --soloCellFilter None
+    #end if
+    ## Splice junctions are always under "raw" directory
+
+    --soloOutFormatFeaturesGeneField3 '${solo.soloOutFormatFeaturesGeneField3}'
+    ## Rename the the selected features directory
+    && mv Solo.out/${solo.soloFeatures} Solo.out/soloFeatures
+    ## put the barcodes and features stats into a single file
+    && cat <(echo "Barcodes:") Solo.out/Barcodes.stats <(echo "Genes:") Solo.out/soloFeatures/Features.stats > '${output_stats}'
+
     ## BAM sorting (logic copied from samtools_sort wrapper)
     ## choosing BAM SortedByCoord appeared once to give fewer reads
     ## than BAM Unsorted followed by a samtools sort
@@ -92,26 +113,30 @@
     addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
     ((addmemory=addmemory*75/100)) &&
     samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam
-
     ]]></command>
+    <configfiles>
+        <configfile name="manifest_file" >
+    #if str($sc.solo_type) == "SmartSeq":
+        #set $cellids_fh = open(str($sc.cell_ids), 'r')
+        #set $cellids = [str(x.strip()) for x in $cellids_fh.readlines()]
+        #silent $cellids_fh.close()
+        #set $samples = []
+        #if str($sc.input_types_smart_seq.use) == "list_single_end":
+            #assert len($cellids) == len($sc.input_types_smart_seq.single_end_collection.keys())
+            #for $i,$r1 in enumerate($sc.input_types_smart_seq.single_end_collection):
+                #silent $samples.append('\t'.join([str($r1), '-', 'ID:' + $cellids[$i]]))
+            #end for
+        #elif str($sc.input_types_smart_seq.use) == "list_paired_end":
+            #assert len($cellids) == len($sc.input_types_smart_seq.paired_end_collection.keys())
+            #for $i,($r1,$r2) in enumerate($sc.input_types_smart_seq.paired_end_collection):
+                #silent $samples.append('\t'.join([str($r1), str($r2), 'ID:' + $cellids[$i]]))
+            #end for
+        #end if
+        #echo '\n'.join($samples)
+    #end if
+        </configfile>
+    </configfiles>
     <inputs>
-        <conditional name="input_types" >
-            <param name="use" type="select" label="Input Type" >
-                <option value="repeat" >Separate barcode and cDNA reads</option>
-                <option value="list_paired" >Paired collection of barcode and cDNA reads</option>
-            </param>
-            <when value="repeat">
-                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data"  multiple="true"
-                label="RNA-Seq FASTQ/FASTA file, Barcode reads" />
-                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data"  multiple="true"
-                label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
-            </when>
-            <when value="list_paired">
-                <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" />
-            </when>
-        </conditional>
-        <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" />
-
         <!-- Genome source. -->
         <conditional name="refGenomeSource">
             <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">
@@ -140,23 +165,83 @@
                 <expand macro="@SJDBOPTIONS@" optional="false"/>
             </when>
         </conditional>
-
-        <section name="solo" title="Advanced Settings" expanded="true">
-            <conditional name="params">
-                <param name="chemistry" type="select" label="Configure Chemistry Options">
-                    <option value="CR2" selected="true">Cell Ranger v2</option>
-                    <option value="CR3">Cell Ranger v3</option>
-                    <option value="custom">Custom</option>
+        <conditional name="sc" >
+            <param name="solo_type" type="select" label="Type of single-cell RNA-seq" >
+                <option value="CB_UMI_Simple">Drop-seq or 10X Chromium</option>
+                <option value="CB_UMI_Complex">inDrop</option>
+                <option value="SmartSeq">Smart-Seq</option>
+            </param>
+            <when value="CB_UMI_Simple">
+                <expand macro="input_selection" />
+                <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist"/>
+                <conditional name="params" >
+                    <param name="chemistry" type="select" label="Configure Chemistry Options">
+                        <option value="CR2" selected="true">Cell Ranger v2</option>
+                        <option value="CR3">Cell Ranger v3</option>
+                        <option value="custom">Custom</option>
+                    </param>
+                    <when value="CR2" />
+                    <when value="CR3" />
+                    <when value="custom" >
+                        <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" />
+                        <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" />
+                        <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" />
+                        <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" />
+                    </when>
+                </conditional>
+                <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." />
+                <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
+                    <expand macro="umidedup_options" />
+                    <option value="Exact" >Exact</option>
+                </param>
+                <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
+    CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.">
+                    <expand macro="cb_match_wl_common" />
+                    <expand macro="cb_match_wl_cellranger" />
                 </param>
-                <when value="CR2" />
-                <when value="CR3" />
-                <when value="custom" >
-                    <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" />
-                    <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" />
-                    <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" />
-                    <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" />
-                </when>
-            </conditional>
+            </when>
+            <when value="CB_UMI_Complex">
+                <expand macro="input_selection" />
+                <repeat name="cb_whitelists" title="Cell barcode whitelist information" max="2" >
+                    <param name="whitelist_file" format="txt,tsv" type="data" label="RNA-Seq Cell Barcode Whitelist"/>
+                    <param name="cb_start_anchor" type="select" label="Start anchor base for cell barcode">
+                        <expand macro="anchor_types" />
+                    </param>
+                    <param name="cb_start_anchor_pos" type="integer" value="0" label="0-based position of the CB start with respect to the anchor base" />
+                    <param name="cb_end_anchor" type="select" label="End anchor base for cell barcode">
+                        <expand macro="anchor_types" />
+                    </param>
+                    <param name="cb_end_anchor_pos" type="integer" value="0" label="0-based position of the CB end with respect to the anchor base" />
+                </repeat>
+                <param name="umi_start_anchor" type="select" label="Start anchor base for UMI">
+                    <expand macro="anchor_types" />
+                </param>
+                <param name="umi_start_anchor_pos" type="integer" value="0" label="0-based position of the UMI start with respect to the anchor base" />
+                <param name="umi_end_anchor" type="select" label="End anchor base for UMI">
+                    <expand macro="anchor_types" />
+                </param>
+                <param name="umi_end_anchor_pos" type="integer" value="0" label="0-based position of the UMI end with respect to the anchor base" />
+                <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." />
+                <param argument="--soloAdapterMismatchesNmax" type="integer" min="1" value="1" label="Maximum number of mismatches allowed in adapter sequence" />
+                <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
+                    <expand macro="umidedup_options" />
+                    <option value="Exact" >Exact</option>
+                </param>
+                <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
+    CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.">
+                    <expand macro="cb_match_wl_common" />
+                </param>
+            </when>
+            <when value="SmartSeq">
+                <expand macro="input_selection_smart_seq" />
+                <param name="cell_ids" type="data" label="File containing cell IDs of the samples. One ID per line in order of samples in the above collection."/>
+                <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
+                    <option value="Exact" >Exact</option>
+                    <option value="NoDedup">Do not deduplicate UMIs</option>
+                </param>
+            </when>
+        </conditional>
+        <section name="solo" title="Advanced Settings" expanded="true">
             <param argument="--soloStrand" type="select" label="Strandedness of Library" help="Unstranded has no strand information, Forward has the read strand the same as the original RNA molecule, Reverse has the read strand opposite to the original RNA molecule">
                 <option value="Unstranded" />
                 <option value="Forward" selected="true" />
@@ -167,42 +252,86 @@
                 <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option>
                 <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option>
             </param>
-            <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, None has UMIs with 1 mismatch distance to others not collapsed">
-                <option value="1MM_All" selected="true">All</option>
-                <option value="1MM_Directional" >Directional</option>
-                <option value="1MM_NotCollapsed" >None</option>
+            <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" >
+                <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option>
+                <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene ((introduced in CellRanger 3.x.x)</option>
             </param>
-            <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." />
+            <conditional name="filter" >
+                <param name="filter_type" type="select" label="Cell filtering type and parameters" >
+                    <option value="cellranger2" selected="true" >Simple filtering of CellRanger v2</option>
+                    <option value="topcells" >Filter top N cells</option>
+                    <option value="no_filter" >Do not filter</option>
+                </param>
+                <when value="cellranger2" >
+                    <param name="n_expected" type="integer" min="1" value="3000" label="Number of expected cells" />
+                    <param name="max_perc" type="float" min="0" max="1" value="0.99" label="Robust maximum percentile for UMI count" />
+                    <param name="max_min_ratio" type="float" min="1" value="10" label="Maximum to minimum ratio for UMI count" />
+                </when>
+                <when value="topcells" >
+                    <param name="n_cells" type="integer" min="1" value="3000" label="Number of top cells to report sorted by UMI count" />
+                </when>
+                <when value="no_filter" />
+            </conditional>
+            <param argument="--soloOutFormatFeaturesGeneField3" type="text" value="Gene Expression" label="Field 3 in the Genes output." help="Input '-' to remove the 3rd column from the output." />
         </section>
     </inputs>
     <outputs>
         <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out">
             <expand macro="dbKeyActions" />
         </data>
-        <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes"
-              from_work_dir="Solo.out/Gene/filtered/features.tsv" />
-        <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes"
-              from_work_dir="Solo.out/Gene/filtered/barcodes.tsv" />
-        <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts"
-              from_work_dir="Solo.out/Gene/filtered/matrix.mtx" >
-            <filter>solo['soloFeatures'] == "Gene" </filter>
+<!--
+        <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" />
+        <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" />
+        <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" >
+            <expand macro="dbKeyActions" />
+        </data>
+-->
+        <!-- soloCellFilter set to None, if SJ is selected for soloFeatures -->
+        <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes raw"
+              from_work_dir="Solo.out/soloFeatures/raw/features.tsv" >
+              <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter>
+        </data>
+        <data format="tsv" name="output_genes_filtered" label="${tool.name} on ${on_string}: Genes filtered"
+              from_work_dir="Solo.out/soloFeatures/filtered/features.tsv" >
+              <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter>
+        </data>
+        <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw"
+              from_work_dir="Solo.out/soloFeatures/raw/barcodes.tsv" >
+              <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter>
+        </data>
+        <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered"
+              from_work_dir="Solo.out/soloFeatures/filtered/barcodes.tsv" >
+              <filter>solo['filter']['filter_type'] != "no_filter" and solo['soloFeatures'] != "SJ" </filter>
+        </data>
+        <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts raw"
+              from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
+            <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] == "no_filter" </filter>
+            <expand macro="dbKeyActions" />
+        </data>
+        <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Gene Counts filtered"
+              from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" >
+            <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] != "no_filter" </filter>
             <expand macro="dbKeyActions" />
         </data>
         <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts"
-              from_work_dir="Solo.out/Gene/filtered/matrixSJ.mtx" >
+              from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
             <filter>solo['soloFeatures'] == "SJ" </filter>
             <expand macro="dbKeyActions" />
         </data>
-        <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts"
-              from_work_dir="Solo.out/Gene/filtered/matrixGeneFull.mtx" >
-            <filter>solo['soloFeatures'] == "GeneFull" </filter>
+        <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts raw"
+              from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
+            <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] == "no_filter" </filter>
+            <expand macro="dbKeyActions" />
+        </data>
+        <data format="mtx" name="output_matrixGeneFull_filtered" label="${tool.name} on ${on_string}: Matrix Full Gene Counts filtered"
+              from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" >
+            <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] != "no_filter" </filter>
             <expand macro="dbKeyActions" />
         </data>
         <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" >
             <expand macro="dbKeyActions" />
         </data>
-        <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries"
-              from_work_dir="Solo.out/Gene/Features.stats" />
+        <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/>
     </outputs>
     <!-- Generating test data that is big enough for STARsolo to detect and small enough
          for Galaxy to test requires careful modification of input FASTA and GTF data,
@@ -214,12 +343,6 @@
     -->
     <tests>
         <test expect_num_outputs="6">
-            <conditional name="input_types">
-                <param name="use" value="repeat" />
-                <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
-            </conditional>
-            <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -227,19 +350,31 @@
                 <param name="sjdbOverhang" value="100" />
                 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
             </conditional>
-            <section name="solo" >
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <conditional name="params">
                     <param name="chemistry" value="CR3" />
                 </conditional>
+                <param name="soloUMIdedup" value="1MM_All" />
+            </conditional>
+            <section name="solo" >
+                <conditional name="filter">
+                    <param name="filter_type" value="no_filter" />
+                </conditional>
                 <param name="soloStrand" value="Forward" />
                 <param name="soloFeatures" value="Gene" />
-                <param name="soloUMIdedup" value="1MM_All" />
             </section>
             <output name="output_barcodes" >
                 <assert_contents>
                     <!-- first and last line -->
-                    <has_line line="ACACCGGTCTAACGGT" />
-                    <has_line line="TTCTCAATCCACGTTC" />
+                    <has_line line="AAACCTGAGCGCTCCA" />
+                    <has_line line="TTTGGTTAGTGGGCTA" />
                 </assert_contents>
             </output>
             <output name="output_genes">
@@ -250,8 +385,8 @@
             </output>
             <output name="output_matrix" >
                 <assert_contents>
-                    <has_line_matching expression="14\s+7\s+7" />
-                    <has_line_matching expression="4\s+7\s+1" />
+                    <has_line_matching expression="14\s+394\s+7" />
+                    <has_line_matching expression="4\s+381\s+1" />
                 </assert_contents>
             </output>
             <output name="output_stats" >
@@ -263,12 +398,6 @@
             <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
         </test>
         <test expect_num_outputs="6"><!-- same as above, but using custom -->
-            <conditional name="input_types">
-                <param name="use" value="repeat" />
-                <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
-            </conditional>
-            <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -276,7 +405,14 @@
                 <param name="sjdbOverhang" value="100" />
                 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
             </conditional>
-            <section name="solo" >
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <conditional name="params">
                     <param name="chemistry" value="custom" />
                     <param name="soloCBstart" value="1" />
@@ -284,23 +420,25 @@
                     <param name="soloUMIstart" value="17" />
                     <param name="soloUMIlen" value="12" />
                 </conditional>
+                <param name="soloUMIdedup" value="1MM_All" />
+            </conditional>
+            <section name="solo" >
                 <param name="soloStrand" value="Forward" />
                 <param name="soloFeatures" value="Gene" />
-                <param name="soloUMIdedup" value="1MM_All" />
             </section>
-            <output name="output_barcodes" >
+            <output name="output_barcodes_filtered" >
                 <assert_contents>
                     <has_line line="ACACCGGTCTAACGGT" />
                     <has_line line="TTCTCAATCCACGTTC" />
                 </assert_contents>
             </output>
-            <output name="output_genes">
+            <output name="output_genes_filtered">
                 <assert_contents>
                     <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
                     <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
                 </assert_contents>
             </output>
-            <output name="output_matrix" >
+            <output name="output_matrix_filtered" >
                 <assert_contents>
                     <has_line_matching expression="14\s+7\s+7" />
                     <has_line_matching expression="4\s+7\s+1" />
@@ -315,12 +453,6 @@
             <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
         </test>
         <test expect_num_outputs="6"><!-- Multiple repeats test -->
-            <conditional name="input_types">
-                <param name="use" value="repeat" />
-                <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
-                <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
-            </conditional>
-            <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -328,15 +460,24 @@
                 <param name="sjdbOverhang" value="100" />
                 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
             </conditional>
-            <section name="solo" >
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz,pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz,pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <conditional name="params">
                     <param name="chemistry" value="CR3" />
                 </conditional>
+                <param name="soloUMIdedup" value="1MM_All" />
+            </conditional>
+            <section name="solo" >
                 <param name="soloStrand" value="Forward" />
                 <param name="soloFeatures" value="Gene" />
-                <param name="soloUMIdedup" value="1MM_All" />
             </section>
-            <output name="output_barcodes" >
+            <output name="output_barcodes_filtered" >
                 <assert_contents>
                     <has_line line="ACACCGGTCTAACGGT" />
                     <has_line line="TTCTCAATCCACGTTC" />
@@ -346,16 +487,97 @@
         </test>
         <test expect_num_outputs="6">
             <!-- Test with paired collection -->
-            <conditional name="input_types">
-                <param name="use" value="list_paired" />
-                <param name="input_collection" >
-                    <collection type="paired">
-                        <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
-                        <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
-                    </collection>
-                </param>
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
+                <param name="genomeSAindexNbases" value="4" />
+                <param name="sjdbOverhang" value="100" />
+                <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
+            </conditional>
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="list_paired" />
+                    <param name="input_collection" >
+                        <collection type="paired">
+                            <element name="forward" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                            <element name="reverse" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                        </collection>
+                    </param>
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+                <conditional name="params">
+                    <param name="chemistry" value="CR3" />
+                </conditional>
+                <param name="soloUMIdedup" value="1MM_All" />
+            </conditional>
+            <section name="solo" >
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="Gene" />
+            </section>
+            <output name="output_barcodes_filtered" >
+                <assert_contents>
+                    <has_line line="ACACCGGTCTAACGGT" />
+                    <has_line line="TTCTCAATCCACGTTC" />
+                </assert_contents>
+            </output>
+            <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
+        </test>
+        <test expect_num_outputs="6">
+            <!-- Test soloFeatures, soloCBmatchWLtype, soloCellFilter, soloOutFormatFeaturesGeneField3, soloUMIfiltering -->
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
+                <param name="genomeSAindexNbases" value="4" />
+                <param name="sjdbOverhang" value="100" />
+                <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
             </conditional>
-            <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+                <param name="soloCBmatchWLtype" value="1MM_multi_pseudocounts" />
+                <conditional name="params">
+                    <param name="chemistry" value="CR3" />
+                </conditional>
+                <param name="soloUMIdedup" value="1MM_All" />
+            </conditional>
+            <section name="solo" >
+                <param name="soloUMIfiltering" value="MultiGeneUMI" />
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="GeneFull" />
+                <conditional name="filter">
+                    <param name="filter_type" value="topcells" />
+                    <param name="n_cells" value="5" />
+                </conditional>
+                <param name="soloOutFormatFeaturesGeneField3" value="Dummy Text" />
+            </section>
+            <output name="output_barcodes_filtered" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="AGACGTTCAAGGCTCC" />
+                    <has_line line="TCAACGAAGCTAGTGG" />
+                </assert_contents>
+            </output>
+            <output name="output_genes_filtered" >
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" />
+                </assert_contents>
+            </output>
+            <output name="output_matrixGeneFull_filtered" >
+                <assert_contents>
+                    <has_line_matching expression="14\s+6\s+14" />
+                    <has_line_matching expression="10\s+6\s+1" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="6">
+            <!-- Test soloType CB_UMI_Complex -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -363,21 +585,168 @@
                 <param name="sjdbOverhang" value="100" />
                 <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
             </conditional>
-            <section name="solo" >
-                <conditional name="params">
-                    <param name="chemistry" value="CR3" />
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Complex" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="indrop.R1.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="indrop.R2.fastq.gz" ftype="fastqsanger.gz" />
                 </conditional>
-                <param name="soloStrand" value="Forward" />
-                <param name="soloFeatures" value="Gene" />
+                <repeat name="cb_whitelists" >
+                    <param name="whitelist_file" value="indrop.barcodes1.txt"/>
+                    <param name="cb_start_anchor" value="0" />
+                    <param name="cb_start_anchor_pos" value="0" />
+                    <param name="cb_end_anchor" value="2" />
+                    <param name="cb_end_anchor_pos" value="-1" />
+                </repeat>
+                <repeat name="cb_whitelists" >
+                    <param name="whitelist_file" value="indrop.barcodes2.txt"/>
+                    <param name="cb_start_anchor" value="3" />
+                    <param name="cb_start_anchor_pos" value="1" />
+                    <param name="cb_end_anchor" value="3" />
+                    <param name="cb_end_anchor_pos" value="8" />
+                </repeat>
+                <param name="umi_start_anchor" value="3" />
+                <param name="umi_start_anchor_pos" value="9" />
+                <param name="umi_end_anchor" value="3" />
+                <param name="umi_end_anchor_pos" value="14" />
+                <param name="soloAdapterSequence" value="GAGTGATTGCTTGTGACGCCTT"  />
+                <param name="soloAdapterMismatchesNmax" value="1" />
                 <param name="soloUMIdedup" value="1MM_All" />
-            </section>
-            <output name="output_barcodes" >
+                <param name="soloCBmatchWLtype" value="1MM" />
+            </conditional>
+            <output name="output_barcodes_filtered" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="ACAACGTGG_AAACCTCC" />
+                    <has_line line="ATTCCAGAC_TTCGCTGG" />
+                </assert_contents>
+            </output>
+            <output name="output_genes_filtered">
                 <assert_contents>
-                    <has_line line="ACACCGGTCTAACGGT" />
-                    <has_line line="TTCTCAATCCACGTTC" />
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered" >
+                <assert_contents>
+                    <has_line_matching expression="14\s+33\s+36" />
+                    <has_line_matching expression="2\s+33\s+1" />
+                </assert_contents>
+            </output>
+            <output name="output_stats" >
+                <assert_contents>
+                    <has_line_matching expression="\s+nExactMatch\s+791" />
+                    <has_line_matching expression="\s+nUMIs\s+36" />
                 </assert_contents>
             </output>
-            <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
+        </test>
+        <test expect_num_outputs="6">
+            <!-- Test soloType SmartSeq -->
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
+                <param name="genomeSAindexNbases" value="4" />
+                <param name="sjdbOverhang" value="100" />
+                <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
+            </conditional>
+            <conditional name="sc" >
+                <param name="solo_type" value="SmartSeq" />
+                <conditional name="input_types_smart_seq">
+                    <param name="use" value="list_paired_end" />
+                    <param name="paired_end_collection" >
+                        <collection type="list:paired">
+                            <element name="pair1">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq1.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq1.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair2">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq2.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq2.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair3">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq3.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq3.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair4">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq4.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq4.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair5">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq5.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq5.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair6">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq6.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq6.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair7">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq7.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq7.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair8">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq8.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq8.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                            <element name="pair9">
+                                <collection type="paired">
+                                    <element name="forward" value="smartseq9.R1.fastq.gz" ftype="fastqsanger.gz" />
+                                    <element name="reverse" value="smartseq9.R2.fastq.gz" ftype="fastqsanger.gz" />
+                                </collection>
+                            </element>
+                        </collection>
+                    </param>
+                </conditional>
+                <param name="cell_ids" value="smartseq.cellids.txt" />
+                <param name="soloUMIdedup" value="Exact" />
+            </conditional>
+            <section name="solo" >
+                <param name="soloStrand" value="Unstranded" />
+                <conditional name="filter">
+                    <param name="filter_type" value="topcells" />
+                    <param name="n_cells" value="2" />
+                </conditional>
+            </section>
+            <output name="output_barcodes_filtered" >
+                <assert_contents>
+                    <has_line line="CSC6_D02" />
+                    <not_has_text text="MGH26_A02" />
+                </assert_contents>
+            </output>
+            <output name="output_genes_filtered">
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered" >
+                <assert_contents>
+                    <has_line_matching expression="14\s+3\s+10" />
+                    <has_line_matching expression="12\s+3\s+1" />
+                </assert_contents>
+            </output>
+            <output name="output_stats" >
+                <assert_contents>
+                    <has_line_matching expression="\s+nExactMatch\s+9000" />
+                    <has_line_matching expression="\s+nUMIs\s+32" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
Binary file test-data/indrop.R1.fastq.gz has changed
Binary file test-data/indrop.R2.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/indrop.barcodes1.txt	Fri Jan 15 17:39:11 2021 +0000
@@ -0,0 +1,384 @@
+
GTTTGTTT
+
ACCGTGTTT
+
GATAGTGTTT
+
TGAGGCGGTTT
+
GATCGTTT
+
ATCACGTTT
+
GATGTAGTTT
+
TGACACAGTTT
+
CTTTCTTT
+
AGCCTCTTT
+
GACGGGCTTT
+
TGAATGACTTT
+
TGCTATTT
+
ACGGAATTT
+
GACATTTGTT
+
TGAGTTCTGTT
+
CCGCTGTT
+
AAAATCGTT
+
GATTGGCGTT
+
TGACTACCGTT
+
GTAACGTT
+
AACTGAGTT
+
GAAGGCAGTT
+
TGACTGTTCTT
+
ACCTTCTT
+
AATACTCTT
+
GAGAAGGCTT
+
TGAAGGAGCTT
+
TCATCCTT
+
AAGCGCCTT
+
GAGGTCCCTT
+
TGACAATACTT
+
TTGGACTT
+
ACCCGACTT
+
GATCTCACTT
+
TGAGACAACTT
+
TCCTTATT
+
AGATGTATT
+
GAGTCATATT
+
TGAGCCGGATT
+
CTTCGATT
+
AGAACGATT
+
GAACGCCATT
+
TGACATACATT
+
ATCTTTGT
+
ACTACTTGT
+
GAAAGATTGT
+
TGACTTGGTGT
+
TTATCTGT
+
ATGGCCTGT
+
GACGAGATGT
+
TGAGTCCATGT
+
GGGTTGGT
+
ACCCTTGGT
+
GATCTGTGGT
+
TGAAAACTGGT
+
GCATGGGT
+
AAATCGGGT
+
GATTGAGGGT
+
TGATCGACGGT
+
CTTCAGGT
+
AGGGAAGGT
+
GAGAATTCGT
+
TGAGTCGTCGT
+
TTAAGCGT
+
ATGCTCCGT
+
GAACTGCCGT
+
TGATAACCCGT
+
CCAACCGT
+
AGTTTACGT
+
GACAATTAGT
+
TGACGGGTAGT
+
GCTCTAGT
+
AGTATGAGT
+
GATTCCGAGT
+
TGACCAGCAGT
+
TGACCAGT
+
AAGCGAAGT
+
GATGGTTTCT
+
TGACACTTTCT
+
AAGCTTCT
+
ATTGATTCT
+
GATGAGGTCT
+
TGACCTCGTCT
+
GTCTCTCT
+
AGCACCTCT
+
GAGCGTTGCT
+
TGATACGTGCT
+
GGCATGCT
+
AAGATGGCT
+
GAACCACGCT
+
TGAGTGGAGCT
+
TCGAAGCT
+
ATGTGTCCT
+
GACGACTCCT
+
TGATATTGCCT
+
TTCGGCCT
+
AAAACGCCT
+
GACAGTCCCT
+
TGATTTACCCT
+
GCTTACCT
+
AATATACCT
+
GAGGGAACCT
+
TGACCATTACT
+
TAACTACT
+
ATTGTGACT
+
GACACGGACT
+
TGAGAAGCACT
+
GTTCAACT
+
ACCGCAACT
+
GATACAAACT
+
TGACCTGTTAT
+
TAGCTTAT
+
AGGGTGTAT
+
GAGAGAGTAT
+
TGAACATCTAT
+
TTGCATAT
+
AACCCATAT
+
GACGATTGAT
+
TGATCCCTGAT
+
GGTGGGAT
+
AATGCGGAT
+
GAACTAGGAT
+
TGAAGCGCGAT
+
GTTACGAT
+
AGCCAAGAT
+
GAGTTGTCAT
+
TGACAAGTCAT
+
ATATGCAT
+
ACTCCGCAT
+
GAGAGCCCAT
+
TGACAGACCAT
+
CGGCACAT
+
AAAGGTAAT
+
GACGAATAAT
+
TGACTCAGAAT
+
ACTTCAAT
+
AGGGCCAAT
+
GAATGGAAAT
+
TGACAACAAAT
+
AATGTTTG
+
ACTGCGTTG
+
GAATTCCTTG
+
TGAAACCCTTG
+
GTACCTTG
+
ACTAGATTG
+
GAGAGAATTG
+
TGAAGGTTGTG
+
TACTTGTG
+
AGGTTAGTG
+
GAATCAAGTG
+
TGACGAGTCTG
+
CCCATCTG
+
AGCAACCTG
+
GATTAAACTG
+
TGATCGTCATG
+
GCAGCATG
+
AAATGAATG
+
GACCCGAATG
+
TGATAGAAATG
+
AGAGGTGG
+
ACAACGTGG
+
GACTGTCTGG
+
TGATTCGCTGG
+
TCATATGG
+
AGTGGATGG
+
GAGACGATGG
+
TGAATGCATGG
+
CTTACGGG
+
AAGAACGGG
+
GACAAGAGGG
+
TGAAAACAGGG
+
TGCAAGGG
+
AAAAGTCGG
+
GAGATCTCGG
+
TGACGTATCGG
+
ATTTCCGG
+
AAGCTACGG
+
GATAAGACGG
+
TGAAGCGTAGG
+
TAAATAGG
+
ATCATGAGG
+
GATGTAAAGG
+
TGAGACAAAGG
+
GAGTTTCG
+
ATCGGTTCG
+
GACTTCTTCG
+
TGAAAATGTCG
+
TAGCCTCG
+
ATTGGATCG
+
GATGCCATCG
+
TGATTAGTCCG
+
TACAGCCG
+
AACTCACCG
+
GATCGGTACG
+
TGAATTCGACG
+
GTTGCACG
+
AATCCCACG
+
GATGTACACG
+
TGAAACACACG
+
AGGCAACG
+
AACGAAACG
+
GAGGCGTTAG
+
TGATCCCGTAG
+
TAGTCTAG
+
ACGTGCTAG
+
GACCTACTAG
+
TGATGTTTGAG
+
GATGTGAG
+
ATTTGGGAG
+
GATGGAGGAG
+
TGATCACCGAG
+
CTATAGAG
+
AACGCAGAG
+
GACCCTTCAG
+
TGAACGCTCAG
+
CATCGCAG
+
ATCTAGCAG
+
GATGTTCCAG
+
TGAATACCCAG
+
TGCGACAG
+
AGGTCACAG
+
GATTTAACAG
+
TGACACAACAG
+
GGAAACAG
+
AGGCCTAAG
+
GAACACTAAG
+
TGACGTAGAAG
+
GGATAAAG
+
AAGTGAAAG
+
GAGTCCAAAG
+
TGATGTCTTTC
+
CGTATTTC
+
AATATCTTC
+
GATGGGATTC
+
TGAGCGCATTC
+
TTTGTGTC
+
ACAGGTGTC
+
GACGCTAGTC
+
TGAGGTTTCTC
+
TTCCGCTC
+
ACACTCCTC
+
GATGACCCTC
+
TGAGTACACTC
+
TGCGTATC
+
ATCTGCATC
+
GATAACCATC
+
TGAGCCACATC
+
CTTTAATC
+
AAAGTAATC
+
GATCCCAATC
+
TGAGGGAAATC
+
CAGTTTGC
+
ACTGAGTGC
+
GAAGTGATGC
+
TGACTCGATGC
+
GCTTTGGC
+
AATGTTGGC
+
GATACCAGGC
+
TGACACAAGGC
+
ATCAGCGC
+
AGTTACCGC
+
GAGAATACGC
+
TGATTGCACGC
+
AACTTAGC
+
AACGGTAGC
+
GACCCATAGC
+
TGACTACGAGC
+
GGAGAAGC
+
ATTCGTTCC
+
GAGGACTTCC
+
TGATCCAGTCC
+
AGAAGTCC
+
AAAACCTCC
+
GACTTACTCC
+
TGAAACAATCC
+
ACCTTGCC
+
AGAAGTGCC
+
GAATTGGGCC
+
TGATTGTCGCC
+
TTATAGCC
+
AGCAAAGCC
+
GACATCTCCC
+
TGAGTAATCCC
+
TGATGCCC
+
AAATGACCC
+
GACTAGACCC
+
TGAGATTTACC
+
TGGCTACC
+
ATTAGGACC
+
GAGAAAGACC
+
TGATCGACACC
+
GTGTAACC
+
ACCCTAACC
+
GATCTCAACC
+
TGATTGTTTAC
+
CGGCTTAC
+
ACAGATTAC
+
GAAAGCGTAC
+
TGAGTCCGTAC
+
ACGTATAC
+
AGTCAATAC
+
GACTCTTGAC
+
TGAGGTCTGAC
+
AACCTGAC
+
ATAGTGGAC
+
GATGACGGAC
+
TGAGCAAGGAC
+
GATTAGAC
+
ATTCCAGAC
+
GAAGGAAGAC
+
TGAGAGTTCAC
+
TGCCTCAC
+
ATTTATCAC
+
GAATGGGCAC
+
TGACTTCGCAC
+
AGCACCAC
+
AGGTGACAC
+
GACCTGACAC
+
TGACTAGTAAC
+
AGCAGAAC
+
ACGGACAAC
+
GATCGGTTTA
+
TGAAGAAGTTA
+
GGCCCTTA
+
AATGGATTA
+
GACCACATTA
+
TGAGCAGGGTA
+
GAGCGGTA
+
ACTTAGGTA
+
GAGGGAGGTA
+
TGACTCGCGTA
+
CGAACGTA
+
AATTCAGTA
+
GATTGATCTA
+
TGATGTGGCTA
+
ATCCGCTA
+
AAAAGCCTA
+
GACGTACCTA
+
TGAGGCTACTA
+
AGAGACTA
+
ACGTGGATA
+
GAGACAGATA
+
TGATTCACATA
+
CGCTAATA
+
ACCATTTGA
+
GACGCCTTGA
+
TGAGAGGCTGA
+
TGGTATGA
+
AAGCTATGA
+
GATGAAATGA
+
TGACTTCTGGA
+
TCCAGGGA
+
AGTGTCGGA
+
GAACAGCGGA
+
TGAATATAGGA
+
GCAGTCGA
+
AAAACTCGA
+
GAGATTGCGA
+
TGAATGACCGA
+
ACCCACGA
+
AGGGAACGA
+
GAAGTTTAGA
+
TGAGGAATAGA
+
AAATCAGA
+
AGTCAAAGA
+
GACCTATTCA
+
TGAAGGATTCA
+
CGACGTCA
+
ACGCTCTCA
+
GATGTGCTCA
+
TGACTGGTGCA
+
TACCGGCA
+
ATAGTCGCA
+
GACGTCAGCA
+
TGAATGAAGCA
+
CCCAAGCA
+
AGCTTTCCA
+
GATCCGTCCA
+
TGAACTAGCCA
+
AATTCCCA
+
AAGACACCA
+
GAGTTAACCA
+
TGATGATAACA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/indrop.barcodes2.txt	Fri Jan 15 17:39:11 2021 +0000
@@ -0,0 +1,384 @@
+
GTTTGTTT
+
CCGTGTTT
+
TAGTGTTT
+
GGCGGTTT
+
GATCGTTT
+
TCACGTTT
+
TGTAGTTT
+
CACAGTTT
+
CTTTCTTT
+
GCCTCTTT
+
CGGGCTTT
+
ATGACTTT
+
TGCTATTT
+
CGGAATTT
+
CATTTGTT
+
GTTCTGTT
+
CCGCTGTT
+
AAATCGTT
+
TTGGCGTT
+
CTACCGTT
+
GTAACGTT
+
ACTGAGTT
+
AGGCAGTT
+
CTGTTCTT
+
ACCTTCTT
+
ATACTCTT
+
GAAGGCTT
+
AGGAGCTT
+
TCATCCTT
+
AGCGCCTT
+
GGTCCCTT
+
CAATACTT
+
TTGGACTT
+
CCCGACTT
+
TCTCACTT
+
GACAACTT
+
TCCTTATT
+
GATGTATT
+
GTCATATT
+
GCCGGATT
+
CTTCGATT
+
GAACGATT
+
ACGCCATT
+
CATACATT
+
ATCTTTGT
+
CTACTTGT
+
AAGATTGT
+
CTTGGTGT
+
TTATCTGT
+
TGGCCTGT
+
CGAGATGT
+
GTCCATGT
+
GGGTTGGT
+
CCCTTGGT
+
TCTGTGGT
+
AAACTGGT
+
GCATGGGT
+
AATCGGGT
+
TTGAGGGT
+
TCGACGGT
+
CTTCAGGT
+
GGGAAGGT
+
GAATTCGT
+
GTCGTCGT
+
TTAAGCGT
+
TGCTCCGT
+
ACTGCCGT
+
TAACCCGT
+
CCAACCGT
+
GTTTACGT
+
CAATTAGT
+
CGGGTAGT
+
GCTCTAGT
+
GTATGAGT
+
TTCCGAGT
+
CCAGCAGT
+
TGACCAGT
+
AGCGAAGT
+
TGGTTTCT
+
CACTTTCT
+
AAGCTTCT
+
TTGATTCT
+
TGAGGTCT
+
CCTCGTCT
+
GTCTCTCT
+
GCACCTCT
+
GCGTTGCT
+
TACGTGCT
+
GGCATGCT
+
AGATGGCT
+
ACCACGCT
+
GTGGAGCT
+
TCGAAGCT
+
TGTGTCCT
+
CGACTCCT
+
TATTGCCT
+
TTCGGCCT
+
AAACGCCT
+
CAGTCCCT
+
TTTACCCT
+
GCTTACCT
+
ATATACCT
+
GGGAACCT
+
CCATTACT
+
TAACTACT
+
TTGTGACT
+
CACGGACT
+
GAAGCACT
+
GTTCAACT
+
CCGCAACT
+
TACAAACT
+
CCTGTTAT
+
TAGCTTAT
+
GGGTGTAT
+
GAGAGTAT
+
ACATCTAT
+
TTGCATAT
+
ACCCATAT
+
CGATTGAT
+
TCCCTGAT
+
GGTGGGAT
+
ATGCGGAT
+
ACTAGGAT
+
AGCGCGAT
+
GTTACGAT
+
GCCAAGAT
+
GTTGTCAT
+
CAAGTCAT
+
ATATGCAT
+
CTCCGCAT
+
GAGCCCAT
+
CAGACCAT
+
CGGCACAT
+
AAGGTAAT
+
CGAATAAT
+
CTCAGAAT
+
ACTTCAAT
+
GGGCCAAT
+
ATGGAAAT
+
CAACAAAT
+
AATGTTTG
+
CTGCGTTG
+
ATTCCTTG
+
AACCCTTG
+
GTACCTTG
+
CTAGATTG
+
GAGAATTG
+
AGGTTGTG
+
TACTTGTG
+
GGTTAGTG
+
ATCAAGTG
+
CGAGTCTG
+
CCCATCTG
+
GCAACCTG
+
TTAAACTG
+
TCGTCATG
+
GCAGCATG
+
AATGAATG
+
CCCGAATG
+
TAGAAATG
+
AGAGGTGG
+
CAACGTGG
+
CTGTCTGG
+
TTCGCTGG
+
TCATATGG
+
GTGGATGG
+
GACGATGG
+
ATGCATGG
+
CTTACGGG
+
AGAACGGG
+
CAAGAGGG
+
AAACAGGG
+
TGCAAGGG
+
AAAGTCGG
+
GATCTCGG
+
CGTATCGG
+
ATTTCCGG
+
AGCTACGG
+
TAAGACGG
+
AGCGTAGG
+
TAAATAGG
+
TCATGAGG
+
TGTAAAGG
+
GACAAAGG
+
GAGTTTCG
+
TCGGTTCG
+
CTTCTTCG
+
AAATGTCG
+
TAGCCTCG
+
TTGGATCG
+
TGCCATCG
+
TTAGTCCG
+
TACAGCCG
+
ACTCACCG
+
TCGGTACG
+
ATTCGACG
+
GTTGCACG
+
ATCCCACG
+
TGTACACG
+
AACACACG
+
AGGCAACG
+
ACGAAACG
+
GGCGTTAG
+
TCCCGTAG
+
TAGTCTAG
+
CGTGCTAG
+
CCTACTAG
+
TGTTTGAG
+
GATGTGAG
+
TTTGGGAG
+
TGGAGGAG
+
TCACCGAG
+
CTATAGAG
+
ACGCAGAG
+
CCCTTCAG
+
ACGCTCAG
+
CATCGCAG
+
TCTAGCAG
+
TGTTCCAG
+
ATACCCAG
+
TGCGACAG
+
GGTCACAG
+
TTTAACAG
+
CACAACAG
+
GGAAACAG
+
GGCCTAAG
+
ACACTAAG
+
CGTAGAAG
+
GGATAAAG
+
AGTGAAAG
+
GTCCAAAG
+
TGTCTTTC
+
CGTATTTC
+
ATATCTTC
+
TGGGATTC
+
GCGCATTC
+
TTTGTGTC
+
CAGGTGTC
+
CGCTAGTC
+
GGTTTCTC
+
TTCCGCTC
+
CACTCCTC
+
TGACCCTC
+
GTACACTC
+
TGCGTATC
+
TCTGCATC
+
TAACCATC
+
GCCACATC
+
CTTTAATC
+
AAGTAATC
+
TCCCAATC
+
GGGAAATC
+
CAGTTTGC
+
CTGAGTGC
+
AGTGATGC
+
CTCGATGC
+
GCTTTGGC
+
ATGTTGGC
+
TACCAGGC
+
CACAAGGC
+
ATCAGCGC
+
GTTACCGC
+
GAATACGC
+
TTGCACGC
+
AACTTAGC
+
ACGGTAGC
+
CCCATAGC
+
CTACGAGC
+
GGAGAAGC
+
TTCGTTCC
+
GGACTTCC
+
TCCAGTCC
+
AGAAGTCC
+
AAACCTCC
+
CTTACTCC
+
AACAATCC
+
ACCTTGCC
+
GAAGTGCC
+
ATTGGGCC
+
TTGTCGCC
+
TTATAGCC
+
GCAAAGCC
+
CATCTCCC
+
GTAATCCC
+
TGATGCCC
+
AATGACCC
+
CTAGACCC
+
GATTTACC
+
TGGCTACC
+
TTAGGACC
+
GAAAGACC
+
TCGACACC
+
GTGTAACC
+
CCCTAACC
+
TCTCAACC
+
TTGTTTAC
+
CGGCTTAC
+
CAGATTAC
+
AAGCGTAC
+
GTCCGTAC
+
ACGTATAC
+
GTCAATAC
+
CTCTTGAC
+
GGTCTGAC
+
AACCTGAC
+
TAGTGGAC
+
TGACGGAC
+
GCAAGGAC
+
GATTAGAC
+
TTCCAGAC
+
AGGAAGAC
+
GAGTTCAC
+
TGCCTCAC
+
TTTATCAC
+
ATGGGCAC
+
CTTCGCAC
+
AGCACCAC
+
GGTGACAC
+
CCTGACAC
+
CTAGTAAC
+
AGCAGAAC
+
CGGACAAC
+
TCGGTTTA
+
AGAAGTTA
+
GGCCCTTA
+
ATGGATTA
+
CCACATTA
+
GCAGGGTA
+
GAGCGGTA
+
CTTAGGTA
+
GGGAGGTA
+
CTCGCGTA
+
CGAACGTA
+
ATTCAGTA
+
TTGATCTA
+
TGTGGCTA
+
ATCCGCTA
+
AAAGCCTA
+
CGTACCTA
+
GGCTACTA
+
AGAGACTA
+
CGTGGATA
+
GACAGATA
+
TTCACATA
+
CGCTAATA
+
CCATTTGA
+
CGCCTTGA
+
GAGGCTGA
+
TGGTATGA
+
AGCTATGA
+
TGAAATGA
+
CTTCTGGA
+
TCCAGGGA
+
GTGTCGGA
+
ACAGCGGA
+
ATATAGGA
+
GCAGTCGA
+
AAACTCGA
+
GATTGCGA
+
ATGACCGA
+
ACCCACGA
+
GGGAACGA
+
AGTTTAGA
+
GGAATAGA
+
AAATCAGA
+
GTCAAAGA
+
CCTATTCA
+
AGGATTCA
+
CGACGTCA
+
CGCTCTCA
+
TGTGCTCA
+
CTGGTGCA
+
TACCGGCA
+
TAGTCGCA
+
CGTCAGCA
+
ATGAAGCA
+
CCCAAGCA
+
GCTTTCCA
+
TCCGTCCA
+
ACTAGCCA
+
AATTCCCA
+
AGACACCA
+
GTTAACCA
+
TGATAACA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/smartseq.cellids.txt	Fri Jan 15 17:39:11 2021 +0000
@@ -0,0 +1,9 @@
+MGH26_A01
+MGH26_A02
+MGH26_A03
+CSC6_D01
+CSC6_D02
+CSC6_D03
+CSC8_H01
+CSC8_H02
+CSC8_H03
Binary file test-data/smartseq1.R1.fastq.gz has changed
Binary file test-data/smartseq1.R2.fastq.gz has changed
Binary file test-data/smartseq2.R1.fastq.gz has changed
Binary file test-data/smartseq2.R2.fastq.gz has changed
Binary file test-data/smartseq3.R1.fastq.gz has changed
Binary file test-data/smartseq3.R2.fastq.gz has changed
Binary file test-data/smartseq4.R1.fastq.gz has changed
Binary file test-data/smartseq4.R2.fastq.gz has changed
Binary file test-data/smartseq5.R1.fastq.gz has changed
Binary file test-data/smartseq5.R2.fastq.gz has changed
Binary file test-data/smartseq6.R1.fastq.gz has changed
Binary file test-data/smartseq6.R2.fastq.gz has changed
Binary file test-data/smartseq7.R1.fastq.gz has changed
Binary file test-data/smartseq7.R2.fastq.gz has changed
Binary file test-data/smartseq8.R1.fastq.gz has changed
Binary file test-data/smartseq8.R2.fastq.gz has changed
Binary file test-data/smartseq9.R1.fastq.gz has changed
Binary file test-data/smartseq9.R2.fastq.gz has changed