diff rg_rnaStarSolo.xml @ 13:9ee34ba73ebf draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit ae6b59a8e52fd34e2347d1fd8d34129c36779266
author iuc
date Fri, 17 Feb 2023 20:04:43 +0000
parents 79b885ce78d7
children 1cd2511a396e
line wrap: on
line diff
--- a/rg_rnaStarSolo.xml	Tue Nov 01 16:57:42 2022 +0000
+++ b/rg_rnaStarSolo.xml	Fri Feb 17 20:04:43 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@" profile="20.01" license="MIT">
+<tool id="rna_starsolo" name="RNA STARSolo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
     <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description>
     <macros>
         <import>macros.xml</import>
@@ -22,16 +22,20 @@
 
     #if str($sc.solo_type) == "CB_UMI_Simple":
     @READSHANDLING@
-    --soloCBwhitelist '$sc.soloCBwhitelist'
+    #if $sc.soloCBwhitelist:
+        --soloCBwhitelist '$sc.soloCBwhitelist'
+    #else
+        --soloCBwhitelist None
+    #end if
     ## 1 - check length of barcode, 0 - do not check
     ## Good for checking custom chemistries
     --soloBarcodeReadLength $sc.soloBarcodeReadLength
-    #if str($sc.params.chemistry) == "CR2":
+    #if str($sc.params.chemistry) == "Cv2":
     --soloCBstart 1
     --soloCBlen 16
     --soloUMIstart 17
     --soloUMIlen 10
-    #else if str($sc.params.chemistry) == "CR3":
+    #else if str($sc.params.chemistry) == "Cv3":
     --soloCBstart 1
     --soloCBlen 16
     --soloUMIstart 17
@@ -90,12 +94,20 @@
     --soloCBwhitelist None
     #end if
 
-    --soloUMIfiltering $solo.soloUMIfiltering
     --soloStrand $solo.soloStrand
     --soloFeatures $solo.soloFeatures
-    --soloUMIdedup $sc.soloUMIdedup
-    --quantMode TranscriptomeSAM
-    --outSAMtype BAM Unsorted
+    --soloUMIdedup $sc.umidedup.soloUMIdedup
+    #if str($sc.umidedup.soloUMIdedup) == "1MM_CR":
+        --soloUMIfiltering $sc.umidedup.soloUMIfiltering
+    #end if
+    --quantMode TranscriptomeSAM $solo.quantModeGene
+    #set $tag_names = str($solo.outSAMattributes).replace(',', ' ')
+    --outSAMattributes $tag_names
+    #if "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None':
+        --outSAMtype BAM SortedByCoordinate
+    #else:
+        --outSAMtype BAM Unsorted
+    #end if
 
     #if str($solo.filter.filter_type) == "cellranger2":
     --soloCellFilter CellRanger2.2 $solo.filter.n_expected $solo.filter.max_perc $solo.filter.max_min_ratio
@@ -109,24 +121,39 @@
     ## Splice junctions are always under "raw" directory
 
     --soloOutFormatFeaturesGeneField3 '${solo.soloOutFormatFeaturesGeneField3}'
+
+    ## Limits
+    @LIMITS@
+
+    ##outWig:
+    @OUTWIG@
     ## Rename the the selected features directory
     && mv Solo.out/${solo.soloFeatures} Solo.out/soloFeatures
     ## put the barcodes and features stats into a single file
     && cat <(echo "Barcodes:") Solo.out/Barcodes.stats <(echo "Genes:") Solo.out/soloFeatures/Features.stats > '${output_stats}'
 
-    ## BAM sorting (logic copied from samtools_sort wrapper)
-    ## choosing BAM SortedByCoord appeared once to give fewer reads
-    ## than BAM Unsorted followed by a samtools sort
-    ## so better go with the latter?
+
+    #if "CB" in $tag_names or "UB" in $tag_names or str($outWig.outWigType) != 'None':
+        ## recompress BAM output for smaller file size
+        && samtools view -b -o '$output_BAM' Aligned.sortedByCoord.out.bam
+    #else:
+        ## BAM sorting (logic copied from samtools_sort wrapper)
+        ## choosing BAM SortedByCoord appeared once to give fewer reads
+        ## than BAM Unsorted followed by a samtools sort
+        ## so better go with the latter?
 
-    &&
-    ##compute the number of ADDITIONAL threads to be used by samtools (-@)
-    addthreads=\${GALAXY_SLOTS:-2} && (( addthreads-- )) &&
-    ##compute the number of memory available to samtools sort (-m)
-    ##use only 75% of available: https://github.com/samtools/samtools/issues/831
-    addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
-    ((addmemory=addmemory*75/100)) &&
-    samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam
+        &&
+        ##compute the number of ADDITIONAL threads to be used by samtools (-@)
+        addthreads=\${GALAXY_SLOTS:-2} && (( addthreads-- )) &&
+        ##compute the number of memory available to samtools sort (-m)
+        ##use only 75% of available: https://github.com/samtools/samtools/issues/831
+        addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
+        ((addmemory=addmemory*75/100)) &&
+        samtools sort -@ \$addthreads -m \$addmemory"M" -T "\${TMPDIR:-.}" -O bam -o '$output_BAM' Aligned.out.bam
+    #end if
+    ##outWig:
+    @OUTWIGOUTPUTS@
+
     ]]></command>
     <configfiles>
         <configfile name="manifest_file" >
@@ -170,13 +197,13 @@
                     </when>
                     <when value="without-gtf">
                         <expand macro="index_selection" with_gene_model="0" />
-                        <expand macro="@SJDBOPTIONS@" optional="false" />
+                        <expand macro="SJDBOPTIONS"/>
                     </when>
                 </conditional>
             </when>
             <when value="history">
                 <expand macro="ref_selection" />
-                <expand macro="@SJDBOPTIONS@" optional="false"/>
+                <expand macro="SJDBOPTIONS"/>
             </when>
         </conditional>
         <conditional name="sc" >
@@ -187,15 +214,15 @@
             </param>
             <when value="CB_UMI_Simple">
                 <expand macro="input_selection" />
-                <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist"/>
+                <param format="txt,tsv" argument="--soloCBwhitelist" optional="True" type="data" label="RNA-Seq Cell Barcode Whitelist"/>
                 <conditional name="params" >
                     <param name="chemistry" type="select" label="Configure Chemistry Options">
-                        <option value="CR2" selected="true">Cell Ranger v2</option>
-                        <option value="CR3">Cell Ranger v3</option>
+                        <option value="Cv2" selected="true">Chromium chemistry v2</option>
+                        <option value="Cv3">Chromium chemistry v3</option>
                         <option value="custom">Custom</option>
                     </param>
-                    <when value="CR2" />
-                    <when value="CR3" />
+                    <when value="Cv2" />
+                    <when value="Cv3" />
                     <when value="custom" >
                         <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" />
                         <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" />
@@ -219,11 +246,25 @@
                     </when>
                 </conditional>
                 <param argument="--soloBarcodeReadLength" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Barcode Size is same size of the Read" help="Disable this if your R1 barcodes contain poly-T bases after the barcode sequence." />
-                <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
-                    <expand macro="umidedup_options" />
-                    <option value="Exact" >Exact</option>
-                    <option value="1MM_CR" >CellRanger2-4 algorithm</option>
-                </param>
+                <conditional name="umidedup">
+                    <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
+                        <expand macro="umidedup_options" />
+                        <option value="Exact" >Exact</option>
+                        <option value="1MM_CR" >CellRanger2-4 algorithm</option>
+                    </param>
+                    <when value="1MM_All"/>
+                    <when value="1MM_Directional_UMItools"/>
+                    <when value="1MM_Directional"/>
+                    <when value="Exact"/>
+                    <when value="1MM_CR">
+                        <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" >
+                            <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option>
+                            <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene</option>
+                            <option value="MultiGeneUMI_All" >Remove all UMIs that map to more than one gene</option>
+                            <option value="MultiGeneUMI_CR" >Remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0</option>
+                        </param>                 
+                    </when>
+                </conditional>
                 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
     CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.">
                     <expand macro="cb_match_wl_common" />
@@ -252,23 +293,46 @@
                 </param>
                 <param name="umi_end_anchor_pos" type="integer" value="0" label="0-based position of the UMI end with respect to the anchor base" />
                 <expand macro="solo_adapter_params" />
-                <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
-                    <expand macro="umidedup_options" />
-                    <option value="Exact" >Exact</option>
-                    <option value="1MM_CR" >CellRanger2-4 algorithm</option>
-                </param>
+                <conditional name="umidedup">
+                    <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
+                        <expand macro="umidedup_options" />
+                        <option value="Exact" >Exact</option>
+                        <option value="1MM_CR" >CellRanger2-4 algorithm</option>
+                    </param>
+                    <when value="1MM_All"/>
+                    <when value="1MM_Directional_UMItools"/>
+                    <when value="1MM_Directional"/>
+                    <when value="Exact"/>
+                    <when value="1MM_CR">
+                        <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" >
+                            <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option>
+                            <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene</option>
+                            <option value="MultiGeneUMI_All" >Remove all UMIs that map to more than one gene</option>
+                            <option value="MultiGeneUMI_CR" >Remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0</option>
+                        </param>                 
+                    </when>
+                </conditional>
                 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
     CBs have to have at least one read with exact match; 1MM_multi: multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches; 1MM_multi_pseudocounts: same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.">
                     <expand macro="cb_match_wl_common" />
+                    <!-- should we add EditDist_2? -->
                 </param>
             </when>
             <when value="SmartSeq">
                 <expand macro="input_selection_smart_seq" />
                 <param name="cell_ids" format="txt,tsv" type="data" label="File containing cell IDs of the samples. One ID per line in order of samples in the above collection."/>
-                <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
-                    <option value="Exact" >Exact</option>
-                    <option value="NoDedup">Do not deduplicate UMIs</option>
-                </param>
+                <conditional name="umidedup">
+                    <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
+                        <expand macro="umidedup_options" />
+                        <option value="Exact" >Exact</option>
+                        <option value="NoDedup" >CellRanger2-4 algorithm</option>
+                    </param>
+                    <when value="1MM_All"/>
+                    <when value="1MM_Directional_UMItools"/>
+                    <when value="1MM_Directional"/>
+                    <when value="Exact"/>
+                    <when value="NoDedup"/>
+                </conditional>
             </when>
         </conditional>
         <section name="solo" title="Advanced Settings" expanded="true">
@@ -281,11 +345,8 @@
                 <option value="Gene" selected="true">Gene: Count reads matching the Gene Transcript</option>
                 <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option>
                 <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option>
-            </param>
-            <param argument="--soloUMIfiltering" type="select" label="Type of UMI filtering" >
-                <option value="-" selected="true">Remove UMIs with N and homopolymers (similar to CellRanger 2.2.0)</option>
-                <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene</option>
-                <option value="MultiGeneUMI_CR" >Remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0</option>
+                <option value="GeneFull_ExonOverIntron" >Full: Count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons</option>
+                <option value="GeneFull_Ex50pAS" >Full: Count all reads overlapping genes' exons and introns: prioritize 50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.</option>
             </param>
             <conditional name="filter" >
                 <param name="filter_type" type="select" label="Cell filtering type and parameters" >
@@ -298,6 +359,7 @@
                     <param name="n_expected" type="integer" min="1" value="3000" label="Number of expected cells" />
                     <param name="max_perc" type="float" min="0" max="1" value="0.99" label="Robust maximum percentile for UMI count" />
                     <param name="max_min_ratio" type="float" min="1" value="10" label="Maximum to minimum ratio for UMI count" />
+                    <param name="output_raw" type="boolean" checked="false" label="Output raw matrix in addition to filtered one" />
                 </when>
                 <when value="emptydrops" >
                     <param name="nExpectedCells" type="integer" min="1" value="3000" label="Number of expected cells" />
@@ -310,14 +372,31 @@
                     <param name="candMaxN" type="integer" value="20000" label="Number of extra barcodes after initial cell calling" />
                     <param name="FDR" type="float" value="0.01" label="Maximum adjusted p-value for determining a barcode as non-ambient" />
                     <param name="simN" type="integer" value="10000" label="Number of log likelihood simulations" />
+                    <param name="output_raw" type="boolean" checked="false" label="Output raw matrix in addition to filtered one" />
                 </when>
                 <when value="topcells" >
                     <param name="n_cells" type="integer" min="1" value="3000" label="Number of top cells to report sorted by UMI count" />
+                    <param name="output_raw" type="boolean" checked="false" label="Output raw matrix in addition to filtered one" />
                 </when>
-                <when value="no_filter" />
+                <when value="no_filter">
+                    <param name="output_raw" type="hidden" value="true" />
+                </when>
             </conditional>
             <param argument="--soloOutFormatFeaturesGeneField3" type="text" value="Gene Expression" label="Field 3 in the Genes output." help="Input '-' to remove the 3rd column from the output." />
+            <param argument="--outSAMattributes" type="select" display="checkboxes" multiple="true" optional="true"
+            label="Read alignment tags to include in the BAM output">
+                <expand macro="common_SAM_attributes"/>
+                <option value="CR">CR Cellular barcode sequence bases (uncorrected)</option>
+                <option value="CY">CY Phred quality of the cellular barcode sequence in the CR tag</option>
+                <option value="GX">GX Gene ID</option>
+                <option value="GN">GN Gene name</option>
+                <option value="CB">CB Cell identifier (corrected)</option>
+                <option value="UB">UB UMI (corrected)</option>
+            </param>
+            <param name="quantModeGene" type="boolean" truevalue="GeneCounts" falsevalue="" checked="false" label="Output global gene count" help="Can be used by MultiQC" />
+            <expand macro="limits" />
         </section>
+        <expand macro="outWig"/>
     </inputs>
     <outputs>
         <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out">
@@ -333,7 +412,7 @@
         <!-- soloCellFilter set to None, if SJ is selected for soloFeatures -->
         <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes raw"
               from_work_dir="Solo.out/soloFeatures/raw/features.tsv" >
-              <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter>
+              <filter>solo['filter']['output_raw'] or solo['soloFeatures'] == "SJ" </filter>
         </data>
         <data format="tsv" name="output_genes_filtered" label="${tool.name} on ${on_string}: Genes filtered"
               from_work_dir="Solo.out/soloFeatures/filtered/features.tsv" >
@@ -341,7 +420,7 @@
         </data>
         <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw"
               from_work_dir="Solo.out/soloFeatures/raw/barcodes.tsv" >
-              <filter>solo['filter']['filter_type'] == "no_filter" or solo['soloFeatures'] == "SJ" </filter>
+              <filter>solo['filter']['output_raw'] or solo['soloFeatures'] == "SJ" </filter>
         </data>
         <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered"
               from_work_dir="Solo.out/soloFeatures/filtered/barcodes.tsv" >
@@ -349,7 +428,7 @@
         </data>
         <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts raw"
               from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
-            <filter>solo['soloFeatures'] == "Gene" and solo['filter']['filter_type'] == "no_filter" </filter>
+            <filter>solo['soloFeatures'] == "Gene" and solo['filter']['output_raw'] </filter>
             <expand macro="dbKeyActions" />
         </data>
         <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Gene Counts filtered"
@@ -364,18 +443,24 @@
         </data>
         <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts raw"
               from_work_dir="Solo.out/soloFeatures/raw/matrix.mtx" >
-            <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] == "no_filter" </filter>
+            <filter>"GeneFull" in solo['soloFeatures'] and solo['filter']['output_raw'] </filter>
             <expand macro="dbKeyActions" />
         </data>
         <data format="mtx" name="output_matrixGeneFull_filtered" label="${tool.name} on ${on_string}: Matrix Full Gene Counts filtered"
               from_work_dir="Solo.out/soloFeatures/filtered/matrix.mtx" >
-            <filter>solo['soloFeatures'] == "GeneFull" and solo['filter']['filter_type'] != "no_filter" </filter>
+            <filter>"GeneFull" in solo['soloFeatures'] and solo['filter']['filter_type'] != "no_filter" </filter>
             <expand macro="dbKeyActions" />
         </data>
         <data format="bam" name="output_BAM" label="${tool.name} on ${on_string}: Alignments" >
             <expand macro="dbKeyActions" />
         </data>
         <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/>
+        <data name="reads_per_gene" format="tabular" label="${tool.name} on ${on_string}: combined reads per gene" from_work_dir="ReadsPerGene.out.tab">
+            <filter>solo['quantModeGene']</filter>
+            <expand macro="dbKeyActions" />
+            <expand macro="outCountActions" />
+        </data>
+        <expand macro="outWigOutputs"/>
     </outputs>
     <!-- Generating test data that is big enough for STARsolo to detect and small enough
          for Galaxy to test requires careful modification of input FASTA and GTF data,
@@ -386,7 +471,8 @@
          here: https://gist.github.com/mtekman/149a7c52fd73e5d8ebe49f5a27b0743d
     -->
     <tests>
-        <test expect_num_outputs="6">
+        <test expect_num_outputs="7">
+            <!-- test 1 -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -403,9 +489,11 @@
                 </conditional>
                 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <conditional name="params">
-                    <param name="chemistry" value="CR3" />
+                    <param name="chemistry" value="Cv3" />
                 </conditional>
-                <param name="soloUMIdedup" value="1MM_All" />
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
             </conditional>
             <section name="solo" >
                 <conditional name="filter">
@@ -413,35 +501,47 @@
                 </conditional>
                 <param name="soloStrand" value="Forward" />
                 <param name="soloFeatures" value="Gene" />
+                <param name="quantModeGene" value="true" />
             </section>
             <output name="output_barcodes" >
                 <assert_contents>
                     <!-- first and last line -->
                     <has_line line="AAACCTGAGCGCTCCA" />
                     <has_line line="TTTGGTTAGTGGGCTA" />
+                    <has_n_lines n="394" />
                 </assert_contents>
             </output>
             <output name="output_genes">
                 <assert_contents>
                     <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
                     <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
                 </assert_contents>
             </output>
             <output name="output_matrix" >
                 <assert_contents>
                     <has_line_matching expression="14\s+394\s+7" />
                     <has_line_matching expression="4\s+381\s+1" />
+                    <has_n_lines n="10" />
                 </assert_contents>
             </output>
             <output name="output_stats" >
                 <assert_contents>
-                    <has_line_matching expression="\s+nUnmapped\s+5823" />
-                    <has_line_matching expression="\s+nUMIs\s+8" />
+                    <has_line_matching expression="\s+noUnmapped\s+5823" />
+                    <has_line_matching expression="\s+yesUMIs\s+8" />
                 </assert_contents>
             </output>
             <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
+            <output name="reads_per_gene" >
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" />
+                    <has_line_matching expression="ENSG00000275464\s+38\s+1\s+40" />
+                </assert_contents>
+            </output>
         </test>
-        <test expect_num_outputs="6"><!-- same as above, but using custom -->
+        <test expect_num_outputs="6">
+            <!-- test 2 -->
+            <!-- same as above, but using custom and no reads_per_gene-->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -471,31 +571,37 @@
             </section>
             <output name="output_barcodes_filtered" >
                 <assert_contents>
+                    <!-- first and last line -->
                     <has_line line="ACACCGGTCTAACGGT" />
                     <has_line line="TTCTCAATCCACGTTC" />
+                    <has_n_lines n="7" />
                 </assert_contents>
             </output>
             <output name="output_genes_filtered">
                 <assert_contents>
                     <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
                     <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
                 </assert_contents>
             </output>
             <output name="output_matrix_filtered" >
                 <assert_contents>
                     <has_line_matching expression="14\s+7\s+7" />
                     <has_line_matching expression="4\s+7\s+1" />
+                    <has_n_lines n="10" />
                 </assert_contents>
             </output>
             <output name="output_stats" >
                 <assert_contents>
-                    <has_line_matching expression="\s+nUnmapped\s+5823" />
-                    <has_line_matching expression="\s+nUMIs\s+8" />
+                    <has_line_matching expression="\s+noUnmapped\s+5823" />
+                    <has_line_matching expression="\s+yesUMIs\s+8" />
                 </assert_contents>
             </output>
             <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
         </test>
-        <test expect_num_outputs="6"><!-- Multiple repeats test -->
+        <test expect_num_outputs="6">
+            <!-- test 3 -->
+            <!-- Multiple repeats test -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -512,9 +618,11 @@
                 </conditional>
                 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <conditional name="params">
-                    <param name="chemistry" value="CR3" />
+                    <param name="chemistry" value="Cv3" />
                 </conditional>
-                <param name="soloUMIdedup" value="1MM_All" />
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
             </conditional>
             <section name="solo" >
                 <param name="soloStrand" value="Forward" />
@@ -524,11 +632,17 @@
                 <assert_contents>
                     <has_line line="ACACCGGTCTAACGGT" />
                     <has_line line="TTCTCAATCCACGTTC" />
+                    <has_n_lines n="7" />
                 </assert_contents>
             </output>
-            <!-- BAM output is huge, we don't need to test here -->
+            <output name="output_BAM" >
+                <assert_contents>
+                    <has_size value="166147" delta="600" />
+                </assert_contents>
+            </output>
         </test>
-        <test expect_num_outputs="6">
+        <test expect_num_outputs="10">
+            <!-- test 4 -->
             <!-- Test with paired collection -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
@@ -550,24 +664,35 @@
                 </conditional>
                 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <conditional name="params">
-                    <param name="chemistry" value="CR3" />
+                    <param name="chemistry" value="Cv3" />
                 </conditional>
-                <param name="soloUMIdedup" value="1MM_All" />
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
             </conditional>
             <section name="solo" >
                 <param name="soloStrand" value="Forward" />
                 <param name="soloFeatures" value="Gene" />
             </section>
+            <conditional name="outWig">
+                <param name="outWigType" value="bedGraph" />
+            </conditional>
             <output name="output_barcodes_filtered" >
                 <assert_contents>
                     <has_line line="ACACCGGTCTAACGGT" />
                     <has_line line="TTCTCAATCCACGTTC" />
+                    <has_n_lines n="7" />
                 </assert_contents>
             </output>
             <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
+            <output name="signal_unique_str1" file="Signal.Unique.str1.out.bg" />
+            <output name="signal_uniquemultiple_str1" file="Signal.UniqueMultiple.str1.out.bg" />
+            <output name="signal_unique_str2" file="Signal.Unique.str2.out.bg" />
+            <output name="signal_uniquemultiple_str2" file="Signal.UniqueMultiple.str2.out.bg" />
         </test>
-        <test expect_num_outputs="6">
-            <!-- Test soloFeatures, soloCBmatchWLtype, soloCellFilter, soloOutFormatFeaturesGeneField3, soloUMIfiltering -->
+        <test expect_num_outputs="9">
+            <!-- test 5 -->
+            <!-- Test soloFeatures, soloCBmatchWLtype, soloCellFilter, soloOutFormatFeaturesGeneField3 -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -585,17 +710,19 @@
                 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <param name="soloCBmatchWLtype" value="1MM_multi_pseudocounts" />
                 <conditional name="params">
-                    <param name="chemistry" value="CR3" />
+                    <param name="chemistry" value="Cv3" />
                 </conditional>
-                <param name="soloUMIdedup" value="1MM_All" />
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
             </conditional>
             <section name="solo" >
-                <param name="soloUMIfiltering" value="MultiGeneUMI" />
                 <param name="soloStrand" value="Forward" />
                 <param name="soloFeatures" value="GeneFull" />
                 <conditional name="filter">
                     <param name="filter_type" value="topcells" />
                     <param name="n_cells" value="5" />
+                    <param name="output_raw" value="true" />
                 </conditional>
                 <param name="soloOutFormatFeaturesGeneField3" value="Dummy Text" />
             </section>
@@ -604,22 +731,48 @@
                     <!-- first and last line -->
                     <has_line line="AGACGTTCAAGGCTCC" />
                     <has_line line="TCAACGAAGCTAGTGG" />
+                    <has_n_lines n="6" />
                 </assert_contents>
             </output>
             <output name="output_genes_filtered" >
                 <assert_contents>
                     <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" />
                     <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" />
+                    <has_n_lines n="14" />
                 </assert_contents>
             </output>
             <output name="output_matrixGeneFull_filtered" >
                 <assert_contents>
                     <has_line_matching expression="14\s+6\s+14" />
                     <has_line_matching expression="10\s+6\s+1" />
+                    <has_n_lines n="17" />
+                </assert_contents>
+            </output>
+            <output name="output_barcodes" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="AAACCTGAGCGCTCCA" />
+                    <has_line line="TTTGGTTAGTGGGCTA" />
+                    <has_n_lines n="394" />
+                </assert_contents>
+            </output>
+            <output name="output_genes">
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" />
+                    <has_n_lines n="14" />
+                </assert_contents>
+            </output>
+            <output name="output_matrix" >
+                <assert_contents>
+                    <has_line_matching expression="14\s+394\s+195" />
+                    <has_line_matching expression="3\s+1\s+1" />
+                    <has_n_lines n="198" />
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="6">
+            <!-- test 6 -->
             <!-- Emptydrops filtering -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
@@ -637,9 +790,11 @@
                 </conditional>
                 <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
                 <conditional name="params">
-                    <param name="chemistry" value="CR3" />
+                    <param name="chemistry" value="Cv3" />
                 </conditional>
-                <param name="soloUMIdedup" value="1MM_All" />
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
             </conditional>
             <section name="solo" >
                 <conditional name="filter">
@@ -663,29 +818,33 @@
                     <!-- first and last line -->
                     <has_line line="ACACCGGTCTAACGGT" />
                     <has_line line="TTCTCAATCCACGTTC" />
+                    <has_n_lines n="7" />
                 </assert_contents>
             </output>
             <output name="output_genes_filtered">
                 <assert_contents>
                     <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
                     <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
                 </assert_contents>
             </output>
             <output name="output_matrix_filtered" >
                 <assert_contents>
                     <has_line_matching expression="14\s+7\s+7" />
                     <has_line_matching expression="4\s+7\s+1" />
+                    <has_n_lines n="10" />
                 </assert_contents>
             </output>
             <output name="output_stats" >
                 <assert_contents>
-                    <has_line_matching expression="\s+nUnmapped\s+5823" />
-                    <has_line_matching expression="\s+nUMIs\s+8" />
+                    <has_line_matching expression="\s+noUnmapped\s+5823" />
+                    <has_line_matching expression="\s+yesUMIs\s+8" />
                 </assert_contents>
             </output>
             <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
         </test>
         <test expect_num_outputs="6">
+            <!-- test 7 -->
             <!-- Test soloType CB_UMI_Complex -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
@@ -722,7 +881,9 @@
                 <param name="soloAdapterSequence" value="GAGTGATTGCTTGTGACGCCTT"  />
                 <param name="soloAdapterMismatchesNmax" value="1" />
                 <param name="clipAdapterType" value="CellRanger4" />
-                <param name="soloUMIdedup" value="1MM_All" />
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
                 <param name="soloCBmatchWLtype" value="1MM" />
             </conditional>
             <output name="output_barcodes_filtered" >
@@ -730,28 +891,32 @@
                     <!-- first and last line -->
                     <has_line line="ACAACGTGG_AAACCTCC" />
                     <has_line line="ATTCCAGAC_TTCGCTGG" />
+                    <has_n_lines n="33" />
                 </assert_contents>
             </output>
             <output name="output_genes_filtered">
                 <assert_contents>
                     <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
                     <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
                 </assert_contents>
             </output>
             <output name="output_matrix_filtered" >
                 <assert_contents>
                     <has_line_matching expression="14\s+33\s+36" />
                     <has_line_matching expression="2\s+33\s+1" />
+                    <has_n_lines n="39" />
                 </assert_contents>
             </output>
             <output name="output_stats" >
                 <assert_contents>
-                    <has_line_matching expression="\s+nExactMatch\s+791" />
-                    <has_line_matching expression="\s+nUMIs\s+36" />
+                    <has_line_matching expression="\s+yesWLmatchExact\s+791" />
+                    <has_line_matching expression="\s+yesUMIs\s+36" />
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="6">
+            <!-- test 8 -->
             <!-- Test soloType SmartSeq -->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
@@ -824,7 +989,9 @@
                     </param>
                 </conditional>
                 <param name="cell_ids" value="smartseq.cellids.txt" />
-                <param name="soloUMIdedup" value="Exact" />
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="Exact" />
+                </conditional>
             </conditional>
             <section name="solo" >
                 <param name="soloStrand" value="Unstranded" />
@@ -837,24 +1004,152 @@
                 <assert_contents>
                     <has_line line="CSC6_D02" />
                     <not_has_text text="MGH26_A02" />
+                    <has_n_lines n="3" />
                 </assert_contents>
             </output>
             <output name="output_genes_filtered">
                 <assert_contents>
                     <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
                     <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
                 </assert_contents>
             </output>
             <output name="output_matrix_filtered" >
                 <assert_contents>
                     <has_line_matching expression="14\s+3\s+10" />
                     <has_line_matching expression="12\s+3\s+1" />
+                    <has_n_lines n="13" />
                 </assert_contents>
             </output>
             <output name="output_stats" >
                 <assert_contents>
-                    <has_line_matching expression="\s+nExactMatch\s+9000" />
-                    <has_line_matching expression="\s+nUMIs\s+32" />
+                    <has_line_matching expression="\s+yesWLmatchExact\s+9000" />
+                    <has_line_matching expression="\s+yesUMIs\s+32" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="6">
+            <!-- test 9 -->
+            <!-- Test outSAMattributes -->
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
+                <param name="genomeSAindexNbases" value="4" />
+                <param name="sjdbOverhang" value="100" />
+                <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
+            </conditional>
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+                <conditional name="params">
+                    <param name="chemistry" value="Cv3" />
+                </conditional>
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
+            </conditional>
+            <section name="solo" >
+                <conditional name="filter">
+                    <param name="filter_type" value="no_filter" />
+                </conditional>
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="Gene" />
+                <param name="outSAMattributes" value="NH,HI,AS,nM,GX,GN,CB,UB" />
+            </section>
+            <output name="output_barcodes" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="AAACCTGAGCGCTCCA" />
+                    <has_line line="TTTGGTTAGTGGGCTA" />
+                    <has_n_lines n="394" />
+                </assert_contents>
+            </output>
+            <output name="output_genes">
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
+                </assert_contents>
+            </output>
+            <output name="output_matrix" >
+                <assert_contents>
+                    <has_line_matching expression="14\s+394\s+7" />
+                    <has_line_matching expression="4\s+381\s+1" />
+                    <has_n_lines n="10" />
+                </assert_contents>
+            </output>
+            <output name="output_stats" >
+                <assert_contents>
+                    <has_line_matching expression="\s+noUnmapped\s+5823" />
+                    <has_line_matching expression="\s+yesUMIs\s+8" />
+                </assert_contents>
+            </output>
+            <output name="output_BAM" >
+                <assert_contents>
+                    <has_size value="153108" delta="600" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="6">
+            <!-- test 10 -->
+            <!-- Test soloFeatures -->
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
+                <param name="genomeSAindexNbases" value="4" />
+                <param name="sjdbOverhang" value="100" />
+                <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
+            </conditional>
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+                <param name="soloCBmatchWLtype" value="1MM_multi_pseudocounts" />
+                <conditional name="params">
+                    <param name="chemistry" value="Cv3" />
+                </conditional>
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_CR" />
+                    <param name="soloUMIfiltering" value="MultiGeneUMI" />
+                </conditional>
+            </conditional>
+            <section name="solo" >
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="GeneFull_ExonOverIntron" />
+                <conditional name="filter">
+                    <param name="filter_type" value="no_filter" />
+                </conditional>
+                <param name="soloOutFormatFeaturesGeneField3" value="Dummy Text" />
+            </section>
+            <output name="output_barcodes" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="AAACCTGAGCGCTCCA" />
+                    <has_line line="TTTGGTTAGTGGGCTA" />
+                    <has_n_lines n="394" />
+                </assert_contents>
+            </output>
+            <output name="output_genes" >
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Dummy\s+Text" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Dummy\s+Text" />
+                    <has_n_lines n="14" />
+                </assert_contents>
+            </output>
+            <output name="output_matrixGeneFull" >
+                <assert_contents>
+                    <has_line_matching expression="14\s+394\s+104" />
+                    <has_line_matching expression="10\s+2\s+1" />
+                    <has_n_lines n="107" />
                 </assert_contents>
             </output>
         </test>