Repository 'rna_starsolo'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rna_starsolo

Changeset 14:1cd2511a396e (2023-02-22)
Previous changeset 13:9ee34ba73ebf (2023-02-17) Next changeset 15:b8f5f6e87f5c (2023-03-09)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 904cd12820a09a8e7ce7d01c64fa22f1ed93ed17
modified:
macros.xml
rg_rnaStarSolo.xml
b
diff -r 9ee34ba73ebf -r 1cd2511a396e macros.xml
--- a/macros.xml Fri Feb 17 20:04:43 2023 +0000
+++ b/macros.xml Wed Feb 22 18:01:29 2023 +0000
[
@@ -5,7 +5,7 @@
     the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ -->
     <!-- STAR version to be used -->
     <token name="@TOOL_VERSION@">2.7.10b</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">21.01</token>
     <!-- STAR index version compatible with this version of STAR
     This is the STAR version that introduced the index structure expected
@@ -64,23 +64,26 @@
     </xml>
     <xml name="dbKeyActions">
         <actions>
-            <conditional name="refGenomeSource.geneSource">
-                <when value="indexed">
-                    <action type="metadata" name="dbkey">
-                        <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0">
-                            <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                            <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
-                        </option>
-                    </action>
-                </when>
-                <when value="history">
-                    <action type="metadata" name="dbkey">
-                        <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" />
-                    </action>
-                </when>
-            </conditional>
+            <expand macro="dbKeyAction"/>
         </actions>
     </xml>
+    <xml name="dbKeyAction">
+        <conditional name="refGenomeSource.geneSource">
+            <when value="indexed">
+                <action type="metadata" name="dbkey">
+                    <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0">
+                        <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                        <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
+                    </option>
+                </action>
+            </when>
+            <when value="history">
+                <action type="metadata" name="dbkey">
+                    <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" />
+                </action>
+            </when>
+        </conditional>
+    </xml>
     <token name="@TEMPINDEX@"><![CDATA[
     ## Create temporary index for custom reference
     #if str($refGenomeSource.geneSource) == 'history':
@@ -219,7 +222,7 @@
         </conditional>
     </xml>
     <xml name="umidedup_options">
-        <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other</option>
+        <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other (1MM_All)</option>
         <option value="1MM_Directional_UMItools" >Directional method from the UMI-tool</option>
         <option value="1MM_Directional" >Directional with stringent UMI deduplication</option>
     </xml>
@@ -231,12 +234,12 @@
     </xml>
     <xml name="cb_match_wl_common">
         <option value="Exact" >Exact</option>
-        <option value="1MM" >Single match</option>
+        <option value="1MM" >Single match (1MM)</option>
     </xml>
     <xml name="cb_match_wl_cellranger">
-        <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2)</option>
-        <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3)</option>
-        <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3)</option>
+        <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2, 1MM_multi)</option>
+        <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3, 1MM_multi_pseudocounts)</option>
+        <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3, 1MM_multi_Nbase_pseudocounts)</option>
     </xml>
     <xml name="solo_adapter_params">
         <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." >
@@ -278,6 +281,7 @@
     <xml name="outCountActions">
         <actions>
             <action name="column_names" type="metadata" default="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
+            <expand macro="dbKeyAction"/>
         </actions>
     </xml>
     <xml name="outWig">
@@ -397,4 +401,13 @@
             <when value="-" />
         </conditional>
     </xml>
+    <xml name="outSAMmapqUnique">
+        <!-- MAPQ 255 is the default in STAR (coming from tophat behaviour and compatibility for Cufflinks) but it is a problematic value
+        - according to SAM/BAM specs it means "undefined".
+        - Using 255 as the max mapq causes problem with modern downstream tools like mutect2: https://sites.duke.edu/workblog/2021/08/18/star-rnaseq-gatk-mutect2/ and 60 has become an inofficial replacement for 255. -->
+        <param argument="--outSAMmapqUnique" type="integer" value="60" min="0" max="255"
+        label="MAPQ value for unique mappers"
+        help="STAR bases the mapping quality scores of alignment records in its BAM output on the number of alternative mappings for the read. If a read maps to multiple locations on the reference genome, the following MAPQ scoring scheme is
+used: >=5 mappings => MAPQ=0; 3-4 mappings => MAPQ=1; 2 mappings => MAPQ=3. This setting lets you control the MAPQ used for reads mapped to a single location. Set to 255 for compatibility with Cufflink (default in STAR) but keep to 60 for modern downstream tools like mutect2." />
+    </xml>
 </macros>
b
diff -r 9ee34ba73ebf -r 1cd2511a396e rg_rnaStarSolo.xml
--- a/rg_rnaStarSolo.xml Fri Feb 17 20:04:43 2023 +0000
+++ b/rg_rnaStarSolo.xml Wed Feb 22 18:01:29 2023 +0000
[
b'@@ -122,6 +122,10 @@\n \n     --soloOutFormatFeaturesGeneField3 \'${solo.soloOutFormatFeaturesGeneField3}\'\n \n+    ## Unmapped\n+    \'$solo.outSAMunmapped\'\n+    ## Read MAPQ\n+    --outSAMmapqUnique ${solo.outSAMmapqUnique}\n     ## Limits\n     @LIMITS@\n \n@@ -189,13 +193,13 @@\n                     <param name="GTFselect" type="select"\n                            label="Reference genome with annotation"\n                            help="Select the \'... with builtin gene-model\' option to select from the list of available indexes that were built with splice junction information. Select the \'... without builtin gene-model\' option to select from the list of available indexes without annotated splice junctions, and provide your own splice junction annonations.">\n-                        <option value="without-gtf" selected=\'true\'>use genome reference without builtin gene-model</option>\n+                        <option value="without-gtf-with-gtf" selected=\'true\'>use genome reference without builtin gene-model</option>\n                         <option value="with-gtf">use genome reference with builtin gene-model</option>\n                     </param>\n                     <when value="with-gtf">\n                         <expand macro="index_selection" with_gene_model="1" />\n                     </when>\n-                    <when value="without-gtf">\n+                    <when value="without-gtf-with-gtf">\n                         <expand macro="index_selection" with_gene_model="0" />\n                         <expand macro="SJDBOPTIONS"/>\n                     </when>\n@@ -325,7 +329,7 @@\n                     <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the \'directional\' method given in UMI-tools, Exact collapses only exactly matching UMIs.">\n                         <expand macro="umidedup_options" />\n                         <option value="Exact" >Exact</option>\n-                        <option value="NoDedup" >CellRanger2-4 algorithm</option>\n+                        <option value="NoDedup" >Do not deduplicate UMIs</option>\n                     </param>\n                     <when value="1MM_All"/>\n                     <when value="1MM_Directional_UMItools"/>\n@@ -388,12 +392,19 @@\n                 <expand macro="common_SAM_attributes"/>\n                 <option value="CR">CR Cellular barcode sequence bases (uncorrected)</option>\n                 <option value="CY">CY Phred quality of the cellular barcode sequence in the CR tag</option>\n+                <option value="UR">UR UMI (uncorrected)</option>\n+                <option value="UY">UY Phred quality of the UMI</option>\n                 <option value="GX">GX Gene ID</option>\n                 <option value="GN">GN Gene name</option>\n                 <option value="CB">CB Cell identifier (corrected)</option>\n                 <option value="UB">UB UMI (corrected)</option>\n+                <option value="sM">sM assessment of CB and UMI</option>\n+                <option value="sS">sS sequence of the entire barcode (CB,UMI,adapter...)</option>\n+                <option value="sQ">quality of the entire barcode</option>\n             </param>\n             <param name="quantModeGene" type="boolean" truevalue="GeneCounts" falsevalue="" checked="false" label="Output global gene count" help="Can be used by MultiQC" />\n+            <param argument="--outSAMunmapped" type="boolean" truevalue="--outSAMunmapped Within" falsevalue="--outSAMunmapped None" checked="false" label="Output unmapped reads in the BAM" />\n+            <expand macro="outSAMmapqUnique"/>\n             <expand macro="limits" />\n         </section>\n         <expand macro="outWig"/>\n@@ -457,7 +468,6 @@\n         <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/>\n         <data name="reads_per_gene" format="tabular" label="${tool.name} o'..b'utput>\n-            <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />\n+            <output name="output_BAM">\n+                <assert_contents>\n+                    <has_size value="884669" delta="80000" />\n+                </assert_contents>\n+            </output>\n         </test>\n         <test expect_num_outputs="6">\n             <!-- test 3 -->\n@@ -1153,6 +1169,78 @@\n                 </assert_contents>\n             </output>\n         </test>\n+        <test expect_num_outputs="7">\n+            <!-- test 11 indexed -->\n+            <conditional name="refGenomeSource">\n+                <param name="geneSource" value="indexed" />\n+                <conditional name="GTFconditional">\n+                    <param name="GTFselect" value="without-gtf-with-gtf" />\n+                    <param name="genomeDir" value="000" />\n+                    <param name="sjdbOverhang" value="75"/>\n+                    <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/>\n+                </conditional>\n+            </conditional>\n+            <conditional name="sc" >\n+                <param name="solo_type" value="CB_UMI_Simple" />\n+                <conditional name="input_types">\n+                    <param name="use" value="repeat" />\n+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />\n+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />\n+                </conditional>\n+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />\n+                <conditional name="params">\n+                    <param name="chemistry" value="Cv3" />\n+                </conditional>\n+                <conditional name="umidedup">\n+                    <param name="soloUMIdedup" value="1MM_All" />\n+                </conditional>\n+            </conditional>\n+            <section name="solo" >\n+                <conditional name="filter">\n+                    <param name="filter_type" value="no_filter" />\n+                </conditional>\n+                <param name="soloStrand" value="Forward" />\n+                <param name="soloFeatures" value="Gene" />\n+                <param name="quantModeGene" value="true" />\n+            </section>\n+            <output name="output_barcodes" >\n+                <assert_contents>\n+                    <!-- first and last line -->\n+                    <has_line line="AAACCTGAGCGCTCCA" />\n+                    <has_line line="TTTGGTTAGTGGGCTA" />\n+                    <has_n_lines n="394" />\n+                </assert_contents>\n+            </output>\n+            <output name="output_genes">\n+                <assert_contents>\n+                    <has_line_matching expression="GENE1\\s+GENE1\\s+Gene\\s+Expression" />\n+                    <has_n_lines n="1" />\n+                </assert_contents>\n+            </output>\n+            <output name="output_matrix" >\n+                <assert_contents>\n+                    <has_line_matching expression="1\\s+394\\s+31" />\n+                    <has_line_matching expression="1\\s+2\\s+1" />\n+                    <has_n_lines n="34" />\n+                </assert_contents>\n+            </output>\n+            <output name="output_stats" >\n+                <assert_contents>\n+                    <has_line_matching expression="\\s+noUnmapped\\s+6335" />\n+                    <has_line_matching expression="\\s+yesUMIs\\s+33" />\n+                </assert_contents>\n+            </output>\n+            <output name="output_BAM">\n+                <assert_contents>\n+                    <has_size value="7133" delta="1000"/>\n+                </assert_contents>\n+            </output>\n+            <output name="reads_per_gene" >\n+                <assert_contents>\n+                    <has_line_matching expression="GENE1\\s+41\\s+41\\s+0" />\n+                </assert_contents>\n+            </output>\n+        </test>\n     </tests>\n     <help><![CDATA[\n **What it does**\n'