changeset 14:1cd2511a396e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 904cd12820a09a8e7ce7d01c64fa22f1ed93ed17
author iuc
date Wed, 22 Feb 2023 18:01:29 +0000
parents 9ee34ba73ebf
children b8f5f6e87f5c
files macros.xml rg_rnaStarSolo.xml
diffstat 2 files changed, 128 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Fri Feb 17 20:04:43 2023 +0000
+++ b/macros.xml	Wed Feb 22 18:01:29 2023 +0000
@@ -5,7 +5,7 @@
     the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ -->
     <!-- STAR version to be used -->
     <token name="@TOOL_VERSION@">2.7.10b</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">21.01</token>
     <!-- STAR index version compatible with this version of STAR
     This is the STAR version that introduced the index structure expected
@@ -64,23 +64,26 @@
     </xml>
     <xml name="dbKeyActions">
         <actions>
-            <conditional name="refGenomeSource.geneSource">
-                <when value="indexed">
-                    <action type="metadata" name="dbkey">
-                        <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0">
-                            <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                            <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
-                        </option>
-                    </action>
-                </when>
-                <when value="history">
-                    <action type="metadata" name="dbkey">
-                        <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" />
-                    </action>
-                </when>
-            </conditional>
+            <expand macro="dbKeyAction"/>
         </actions>
     </xml>
+    <xml name="dbKeyAction">
+        <conditional name="refGenomeSource.geneSource">
+            <when value="indexed">
+                <action type="metadata" name="dbkey">
+                    <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0">
+                        <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
+                        <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
+                    </option>
+                </action>
+            </when>
+            <when value="history">
+                <action type="metadata" name="dbkey">
+                    <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" />
+                </action>
+            </when>
+        </conditional>
+    </xml>
     <token name="@TEMPINDEX@"><![CDATA[
     ## Create temporary index for custom reference
     #if str($refGenomeSource.geneSource) == 'history':
@@ -219,7 +222,7 @@
         </conditional>
     </xml>
     <xml name="umidedup_options">
-        <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other</option>
+        <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other (1MM_All)</option>
         <option value="1MM_Directional_UMItools" >Directional method from the UMI-tool</option>
         <option value="1MM_Directional" >Directional with stringent UMI deduplication</option>
     </xml>
@@ -231,12 +234,12 @@
     </xml>
     <xml name="cb_match_wl_common">
         <option value="Exact" >Exact</option>
-        <option value="1MM" >Single match</option>
+        <option value="1MM" >Single match (1MM)</option>
     </xml>
     <xml name="cb_match_wl_cellranger">
-        <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2)</option>
-        <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3)</option>
-        <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3)</option>
+        <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2, 1MM_multi)</option>
+        <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3, 1MM_multi_pseudocounts)</option>
+        <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3, 1MM_multi_Nbase_pseudocounts)</option>
     </xml>
     <xml name="solo_adapter_params">
         <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." >
@@ -278,6 +281,7 @@
     <xml name="outCountActions">
         <actions>
             <action name="column_names" type="metadata" default="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
+            <expand macro="dbKeyAction"/>
         </actions>
     </xml>
     <xml name="outWig">
@@ -397,4 +401,13 @@
             <when value="-" />
         </conditional>
     </xml>
+    <xml name="outSAMmapqUnique">
+        <!-- MAPQ 255 is the default in STAR (coming from tophat behaviour and compatibility for Cufflinks) but it is a problematic value
+        - according to SAM/BAM specs it means "undefined".
+        - Using 255 as the max mapq causes problem with modern downstream tools like mutect2: https://sites.duke.edu/workblog/2021/08/18/star-rnaseq-gatk-mutect2/ and 60 has become an inofficial replacement for 255. -->
+        <param argument="--outSAMmapqUnique" type="integer" value="60" min="0" max="255"
+        label="MAPQ value for unique mappers"
+        help="STAR bases the mapping quality scores of alignment records in its BAM output on the number of alternative mappings for the read. If a read maps to multiple locations on the reference genome, the following MAPQ scoring scheme is
+used: >=5 mappings => MAPQ=0; 3-4 mappings => MAPQ=1; 2 mappings => MAPQ=3. This setting lets you control the MAPQ used for reads mapped to a single location. Set to 255 for compatibility with Cufflink (default in STAR) but keep to 60 for modern downstream tools like mutect2." />
+    </xml>
 </macros>
--- a/rg_rnaStarSolo.xml	Fri Feb 17 20:04:43 2023 +0000
+++ b/rg_rnaStarSolo.xml	Wed Feb 22 18:01:29 2023 +0000
@@ -122,6 +122,10 @@
 
     --soloOutFormatFeaturesGeneField3 '${solo.soloOutFormatFeaturesGeneField3}'
 
+    ## Unmapped
+    '$solo.outSAMunmapped'
+    ## Read MAPQ
+    --outSAMmapqUnique ${solo.outSAMmapqUnique}
     ## Limits
     @LIMITS@
 
@@ -189,13 +193,13 @@
                     <param name="GTFselect" type="select"
                            label="Reference genome with annotation"
                            help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions, and provide your own splice junction annonations.">
-                        <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option>
+                        <option value="without-gtf-with-gtf" selected='true'>use genome reference without builtin gene-model</option>
                         <option value="with-gtf">use genome reference with builtin gene-model</option>
                     </param>
                     <when value="with-gtf">
                         <expand macro="index_selection" with_gene_model="1" />
                     </when>
-                    <when value="without-gtf">
+                    <when value="without-gtf-with-gtf">
                         <expand macro="index_selection" with_gene_model="0" />
                         <expand macro="SJDBOPTIONS"/>
                     </when>
@@ -325,7 +329,7 @@
                     <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs.">
                         <expand macro="umidedup_options" />
                         <option value="Exact" >Exact</option>
-                        <option value="NoDedup" >CellRanger2-4 algorithm</option>
+                        <option value="NoDedup" >Do not deduplicate UMIs</option>
                     </param>
                     <when value="1MM_All"/>
                     <when value="1MM_Directional_UMItools"/>
@@ -388,12 +392,19 @@
                 <expand macro="common_SAM_attributes"/>
                 <option value="CR">CR Cellular barcode sequence bases (uncorrected)</option>
                 <option value="CY">CY Phred quality of the cellular barcode sequence in the CR tag</option>
+                <option value="UR">UR UMI (uncorrected)</option>
+                <option value="UY">UY Phred quality of the UMI</option>
                 <option value="GX">GX Gene ID</option>
                 <option value="GN">GN Gene name</option>
                 <option value="CB">CB Cell identifier (corrected)</option>
                 <option value="UB">UB UMI (corrected)</option>
+                <option value="sM">sM assessment of CB and UMI</option>
+                <option value="sS">sS sequence of the entire barcode (CB,UMI,adapter...)</option>
+                <option value="sQ">quality of the entire barcode</option>
             </param>
             <param name="quantModeGene" type="boolean" truevalue="GeneCounts" falsevalue="" checked="false" label="Output global gene count" help="Can be used by MultiQC" />
+            <param argument="--outSAMunmapped" type="boolean" truevalue="--outSAMunmapped Within" falsevalue="--outSAMunmapped None" checked="false" label="Output unmapped reads in the BAM" />
+            <expand macro="outSAMmapqUnique"/>
             <expand macro="limits" />
         </section>
         <expand macro="outWig"/>
@@ -457,7 +468,6 @@
         <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/>
         <data name="reads_per_gene" format="tabular" label="${tool.name} on ${on_string}: combined reads per gene" from_work_dir="ReadsPerGene.out.tab">
             <filter>solo['quantModeGene']</filter>
-            <expand macro="dbKeyActions" />
             <expand macro="outCountActions" />
         </data>
         <expand macro="outWigOutputs"/>
@@ -537,11 +547,12 @@
                     <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" />
                     <has_line_matching expression="ENSG00000275464\s+38\s+1\s+40" />
                 </assert_contents>
+                <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
             </output>
         </test>
         <test expect_num_outputs="6">
             <!-- test 2 -->
-            <!-- same as above, but using custom and no reads_per_gene-->
+            <!-- same as above, but using custom, no reads_per_gene and include unmapped reads-->
             <conditional name="refGenomeSource">
                 <param name="geneSource" value="history" />
                 <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
@@ -568,6 +579,7 @@
             <section name="solo" >
                 <param name="soloStrand" value="Forward" />
                 <param name="soloFeatures" value="Gene" />
+                <param name="outSAMunmapped" value="true" />
             </section>
             <output name="output_barcodes_filtered" >
                 <assert_contents>
@@ -597,7 +609,11 @@
                     <has_line_matching expression="\s+yesUMIs\s+8" />
                 </assert_contents>
             </output>
-            <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" />
+            <output name="output_BAM">
+                <assert_contents>
+                    <has_size value="884669" delta="80000" />
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="6">
             <!-- test 3 -->
@@ -1153,6 +1169,78 @@
                 </assert_contents>
             </output>
         </test>
+        <test expect_num_outputs="7">
+            <!-- test 11 indexed -->
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="indexed" />
+                <conditional name="GTFconditional">
+                    <param name="GTFselect" value="without-gtf-with-gtf" />
+                    <param name="genomeDir" value="000" />
+                    <param name="sjdbOverhang" value="75"/>
+                    <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/>
+                </conditional>
+            </conditional>
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+                <conditional name="params">
+                    <param name="chemistry" value="Cv3" />
+                </conditional>
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
+            </conditional>
+            <section name="solo" >
+                <conditional name="filter">
+                    <param name="filter_type" value="no_filter" />
+                </conditional>
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="Gene" />
+                <param name="quantModeGene" value="true" />
+            </section>
+            <output name="output_barcodes" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="AAACCTGAGCGCTCCA" />
+                    <has_line line="TTTGGTTAGTGGGCTA" />
+                    <has_n_lines n="394" />
+                </assert_contents>
+            </output>
+            <output name="output_genes">
+                <assert_contents>
+                    <has_line_matching expression="GENE1\s+GENE1\s+Gene\s+Expression" />
+                    <has_n_lines n="1" />
+                </assert_contents>
+            </output>
+            <output name="output_matrix" >
+                <assert_contents>
+                    <has_line_matching expression="1\s+394\s+31" />
+                    <has_line_matching expression="1\s+2\s+1" />
+                    <has_n_lines n="34" />
+                </assert_contents>
+            </output>
+            <output name="output_stats" >
+                <assert_contents>
+                    <has_line_matching expression="\s+noUnmapped\s+6335" />
+                    <has_line_matching expression="\s+yesUMIs\s+33" />
+                </assert_contents>
+            </output>
+            <output name="output_BAM">
+                <assert_contents>
+                    <has_size value="7133" delta="1000"/>
+                </assert_contents>
+            </output>
+            <output name="reads_per_gene" >
+                <assert_contents>
+                    <has_line_matching expression="GENE1\s+41\s+41\s+0" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 **What it does**