Repository 'rna_starsolo'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/rna_starsolo

Changeset 22:a00cceb45700 (2025-05-31)
Previous changeset 21:381a32c51141 (2024-12-05)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit e5d0d8e40525840311dea1d212af911a6eade256
modified:
macros.xml
rg_rnaStarSolo.xml
added:
test-data/filtered4_algo_full.bam
b
diff -r 381a32c51141 -r a00cceb45700 macros.xml
--- a/macros.xml Thu Dec 05 06:50:56 2024 +0000
+++ b/macros.xml Sat May 31 19:53:27 2025 +0000
[
b'@@ -4,8 +4,8 @@\n     The data manager uses a symlink to this macro file to keep the STAR and\n     the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ -->\n     <!-- STAR version to be used -->\n-    <token name="@TOOL_VERSION@">2.7.11a</token>\n-    <token name="@VERSION_SUFFIX@">1</token>\n+    <token name="@TOOL_VERSION@">2.7.11b</token>\n+    <token name="@VERSION_SUFFIX@">0</token>\n     <token name="@PROFILE@">21.01</token>\n     <!-- STAR index version compatible with this version of STAR\n     This is the STAR version that introduced the index structure expected\n@@ -121,7 +121,7 @@\n                 #if str($refGenomeSource.diploidconditional.diploid) == \'Yes\':\n                     --genomeTransformVCF \'${refGenomeSource.diploidconditional.genomeTransformVCF}\'\n                     --genomeTransformType Diploid\n-                #end if   \n+                #end if\n             #end if\n             --runThreadN \\${GALAXY_SLOTS:-4}\n             ## in bytes\n@@ -371,6 +371,13 @@\n             </change_format>\n         </data>\n     </xml>\n+    <xml name="quantTranscriptomeSAMoutput_param">\n+        <param argument="--quantTranscriptomeSAMoutput" type="select" label="Alignment filtering for TranscriptomeSAM output">\n+            <option value="BanSingleEnd_BanIndels_ExtendSoftclip" selected="true">prohibit indels and single-end alignments, extend softclips - compatible with RSEM</option>\n+            <option value="BanSingleEnd">prohibit single-end alignments, allow indels and softclips</option>\n+            <option value="BanSingleEnd_ExtendSoftclip">prohibit single-end alignments, extend softclips, allow indels</option>\n+        </param>\n+    </xml>\n     <xml name="quantMode">\n         <conditional name="quantmode_output">\n             <param argument="--quantMode" type="select" label="Per gene/transcript output" help="STAR can provide analysis results not only with respect to the reference genome, but also with respect to genes and transcripts described by a gene model. Note: This functionality requires either the selection above of a cached index with a gene model, or a gene model provided alongside the index/reference genome in GTF or GFF3 format!">\n@@ -382,10 +389,10 @@\n             <when value="-"/>\n             <when value="GeneCounts"/>\n             <when value="TranscriptomeSAM">\n-                <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled."/>\n+                <expand macro="quantTranscriptomeSAMoutput_param"/>\n             </when>\n             <when value="TranscriptomeSAM GeneCounts">\n-                <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled."/>\n+                <expand macro="quantTranscriptomeSAMoutput_param"/>\n             </when>\n         </conditional>\n     </xml>\n@@ -432,4 +439,149 @@\n             <when value=""/>\n         </conditional>\n     </xml>\n+    <xml name="full_algo_params">\n+        <section name="seed" title="Seed parameters" expanded="false">\n+            <param argument="--seedSe'..b'ameter options\n+\n+            ## Seed parameter options\n+            --seedSearchStartLmax ${algo.params.seed.seedSearchStartLmax}\n+            --seedSearchStartLmaxOverLread ${algo.params.seed.seedSearchStartLmaxOverLread}\n+            --seedSearchLmax ${algo.params.seed.seedSearchLmax}\n+            --seedMultimapNmax ${algo.params.seed.seedMultimapNmax}\n+            --seedPerReadNmax ${algo.params.seed.seedPerReadNmax}\n+            --seedPerWindowNmax ${algo.params.seed.seedPerWindowNmax}\n+            --seedNoneLociPerWindow ${algo.params.seed.seedNoneLociPerWindow}\n+\n+            ## Alignment parameter options\n+            --alignIntronMin ${algo.params.align.alignIntronMin}\n+            --alignIntronMax ${algo.params.align.alignIntronMax}\n+            --alignMatesGapMax ${algo.params.align.alignMatesGapMax}\n+            --alignSJoverhangMin ${algo.params.align.alignSJoverhangMin}\n+            --alignSJstitchMismatchNmax ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax1} ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax2} ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax3} ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax4}\n+            --alignSJDBoverhangMin ${algo.params.align.alignSJDBoverhangMin}\n+            --alignSplicedMateMapLmin ${algo.params.align.alignSplicedMateMapLmin}\n+            --alignSplicedMateMapLminOverLmate ${algo.params.align.alignSplicedMateMapLminOverLmate}\n+            --alignWindowsPerReadNmax ${algo.params.align.alignWindowsPerReadNmax}\n+            --alignTranscriptsPerWindowNmax ${algo.params.align.alignTranscriptsPerWindowNmax}\n+            --alignTranscriptsPerReadNmax ${algo.params.align.alignTranscriptsPerReadNmax}\n+            --alignEndsType ${algo.params.align.alignEndsType}\n+            --peOverlapNbasesMin ${algo.params.align.peOverlapNbasesMin}\n+            --peOverlapMMp ${algo.params.align.peOverlapMMp}\n+            ## Chimeric alignment parameter options\n+            #if str($chimOutType):\n+                --chimSegmentMin ${algo.params.chim_settings.chimSegmentMin}\n+                --chimScoreMin ${algo.params.chim_settings.chimScoreMin}\n+                --chimScoreDropMax $algo.params.chim_settings.chimScoreDropMax\n+                --chimScoreSeparation $algo.params.chim_settings.chimScoreSeparation\n+                --chimScoreJunctionNonGTAG $algo.params.chim_settings.chimScoreJunctionNonGTAG\n+                --chimSegmentReadGapMax $algo.params.chim_settings.chimSegmentReadGapMax\n+                --chimFilter $algo.params.chim_settings.chimFilter\n+                --chimJunctionOverhangMin $algo.params.chim_settings.chimJunctionOverhangMin\n+                --chimMainSegmentMultNmax $algo.params.chim_settings.chimMainSegmentMultNmax\n+                #if str($chimOutType) == \'Junctions\':\n+                    --chimMultimapNmax $algo.params.chim_settings.chimMultimapNmax\n+                #else:\n+                    --chimMultimapNmax 0\n+                #end if\n+                --chimMultimapScoreRange $algo.params.chim_settings.chimMultimapScoreRange\n+            #end if\n+\n+            ## Limits\n+            @LIMITS@\n+    ]]></token>\n+    <token name="@ALGO_DEFAULT@"><![CDATA[\n+            ## Go with STAR\'s default algorithmic settings,\n+            ## but we need to provide a reasonable default\n+            ## (taken from STAR-Fusion)\n+            ## for --chimSegmentMin in case the user enabled chimeric\n+            ## alignments (the STAR default is 0, which disables chimeric\n+            ## alignments). For consistency, also set\n+            ## --chimMultimapNmax to 1 when chimeric alignments are reported\n+            ## in Junctions format only.\n+            #if str($chimOutType):\n+                --chimSegmentMin 12\n+                #if str($chimOutType) == \'Junctions\':\n+                    --chimMultimapNmax 1\n+                #end if\n+            #end if\n+    ]]></token>\n+\n </macros>\n'
b
diff -r 381a32c51141 -r a00cceb45700 rg_rnaStarSolo.xml
--- a/rg_rnaStarSolo.xml Thu Dec 05 06:50:56 2024 +0000
+++ b/rg_rnaStarSolo.xml Sat May 31 19:53:27 2025 +0000
[
@@ -131,8 +131,12 @@
     $solo.outSAMunmapped
     ## Read MAPQ
     --outSAMmapqUnique ${solo.outSAMmapqUnique}
-    ## Limits
-    @LIMITS@
+
+    #if str( $algo.params.settingsType ) == 'full':
+    @ALGO_FULL@
+    #else:
+    @ALGO_DEFAULT@
+    #end if
 
     ##outWig:
     @OUTWIG@
@@ -273,7 +277,7 @@
                             <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene</option>
                             <option value="MultiGeneUMI_All" >Remove all UMIs that map to more than one gene</option>
                             <option value="MultiGeneUMI_CR" >Remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0</option>
-                        </param>                 
+                        </param>
                     </when>
                 </conditional>
                 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
@@ -320,7 +324,7 @@
                             <option value="MultiGeneUMI" >Remove lower-count UMIs that map to more than one gene</option>
                             <option value="MultiGeneUMI_All" >Remove all UMIs that map to more than one gene</option>
                             <option value="MultiGeneUMI_CR" >Remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0</option>
-                        </param>                 
+                        </param>
                     </when>
                 </conditional>
                 <param argument="--soloCBmatchWLtype" type="select" label="Matching the Cell Barcodes to the WhiteList" help="Exact: only exact matches allowed; 1MM: only one match in whitelist with 1 mismatched base allowed. Allowed
@@ -414,8 +418,20 @@
             <param name="quantModeGene" type="boolean" truevalue="GeneCounts" falsevalue="" checked="false" label="Output global gene count" help="Can be used by MultiQC" />
             <param argument="--outSAMunmapped" type="boolean" truevalue="--outSAMunmapped Within" falsevalue="--outSAMunmapped None" checked="false" label="Output unmapped reads in the BAM" />
             <expand macro="outSAMmapqUnique"/>
-            <expand macro="limits" />
         </section>
+        <section name="algo" title="Algorithmic settings" expanded="true">
+            <conditional name="params">
+                <param name="settingsType" type="select" label="Configure seed, alignment and limits options">
+                    <option value="default" selected="true">Use Defaults</option>
+                    <option value="full">Extended parameter list</option>
+                </param>
+                <when value="default"/>
+                <when value="full">
+                    <expand macro="full_algo_params"/>
+                </when>
+            </conditional>
+        </section>
+        <expand macro="chim_params"/>
         <expand macro="outWig"/>
     </inputs>
     <outputs>
@@ -1381,6 +1397,93 @@
                 <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
             </output>
         </test>
+        <test expect_num_outputs="7">
+            <!-- test 14 -->
+            <conditional name="refGenomeSource">
+                <param name="geneSource" value="history" />
+                <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" />
+                <param name="genomeSAindexNbases" value="4" />
+                <param name="sjdbOverhang" value="100" />
+                <param name="sjdbGTFfile" value="filtered3.Homo_sapiens.GRCh38.100.chr21.gtf" ftype="gtf"/>
+            </conditional>
+            <conditional name="sc" >
+                <param name="solo_type" value="CB_UMI_Simple" />
+                <conditional name="input_types">
+                    <param name="use" value="repeat" />
+                    <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" />
+                    <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" />
+                </conditional>
+                <param name="soloCBwhitelist" value="filtered.barcodes.txt" />
+                <conditional name="params">
+                    <param name="chemistry" value="Cv3" />
+                </conditional>
+                <conditional name="umidedup">
+                    <param name="soloUMIdedup" value="1MM_All" />
+                </conditional>
+            </conditional>
+            <section name="solo" >
+                <conditional name="filter">
+                    <param name="filter_type" value="no_filter" />
+                </conditional>
+                <param name="soloStrand" value="Forward" />
+                <param name="soloFeatures" value="Gene" />
+                <param name="quantModeGene" value="true" />
+                <conditional name="wasp_conditional">
+                    <param name="waspOutputMode" value="wasp_mode"/>
+                    <param name="varVCFfile" value="filtered3.vcf" ftype="vcf" />
+                </conditional>
+            </section>
+            <section name="algo">
+                <conditional name="params">
+                    <param name="settingsType" value="full" />
+                    <section name="seed">
+                        <param name="seed_select" value="yes" />
+                        <param name="seedSearchStartLmax" value="25" />
+                        <param name="seedSearchStartLmax" value="25" />
+                    </section>
+                    <section name="align">
+                        <param name="alignIntronMax" value="100" />
+                        <param name="alignEndsType" value="EndToEnd" />
+                    </section>
+                </conditional>
+            </section>
+            <output name="output_barcodes" >
+                <assert_contents>
+                    <!-- first and last line -->
+                    <has_line line="AAACCTGAGCGCTCCA" />
+                    <has_line line="TTTGGTTAGTGGGCTA" />
+                    <has_n_lines n="394" />
+                </assert_contents>
+            </output>
+            <output name="output_genes">
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+FP565260\.4\s+Gene\s+Expression" />
+                    <has_line_matching expression="ENSG00000279064\s+FP236315\.1\s+Gene\s+Expression" />
+                    <has_n_lines n="14" />
+                </assert_contents>
+            </output>
+            <output name="output_matrix" >
+                <assert_contents>
+                    <has_line_matching expression="14\s+394\s+6" />
+                    <has_line_matching expression="4\s+284\s+1" />
+                    <has_n_lines n="9" />
+                </assert_contents>
+            </output>
+            <output name="output_stats" >
+                <assert_contents>
+                    <has_line_matching expression="\s+noUnmapped\s+6040" />
+                    <has_line_matching expression="\s+yesUMIs\s+6" />
+                </assert_contents>
+            </output>
+            <output name="output_BAM" value="filtered4_algo_full.bam" ftype="bam" lines_diff="6"/>
+            <output name="reads_per_gene" >
+                <assert_contents>
+                    <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" />
+                    <has_line_matching expression="ENSG00000275464\s+5\s+0\s+5" />
+                </assert_contents>
+                <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 **What it does**
b
diff -r 381a32c51141 -r a00cceb45700 test-data/filtered4_algo_full.bam
b
Binary file test-data/filtered4_algo_full.bam has changed