Mercurial > repos > iuc > rna_starsolo
changeset 14:1cd2511a396e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 904cd12820a09a8e7ce7d01c64fa22f1ed93ed17
author | iuc |
---|---|
date | Wed, 22 Feb 2023 18:01:29 +0000 |
parents | 9ee34ba73ebf |
children | b8f5f6e87f5c |
files | macros.xml rg_rnaStarSolo.xml |
diffstat | 2 files changed, 128 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Fri Feb 17 20:04:43 2023 +0000 +++ b/macros.xml Wed Feb 22 18:01:29 2023 +0000 @@ -5,7 +5,7 @@ the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ --> <!-- STAR version to be used --> <token name="@TOOL_VERSION@">2.7.10b</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">21.01</token> <!-- STAR index version compatible with this version of STAR This is the STAR version that introduced the index structure expected @@ -64,23 +64,26 @@ </xml> <xml name="dbKeyActions"> <actions> - <conditional name="refGenomeSource.geneSource"> - <when value="indexed"> - <action type="metadata" name="dbkey"> - <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0"> - <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> - <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/> - </option> - </action> - </when> - <when value="history"> - <action type="metadata" name="dbkey"> - <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" /> - </action> - </when> - </conditional> + <expand macro="dbKeyAction"/> </actions> </xml> + <xml name="dbKeyAction"> + <conditional name="refGenomeSource.geneSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </xml> <token name="@TEMPINDEX@"><![CDATA[ ## Create temporary index for custom reference #if str($refGenomeSource.geneSource) == 'history': @@ -219,7 +222,7 @@ </conditional> </xml> <xml name="umidedup_options"> - <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other</option> + <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other (1MM_All)</option> <option value="1MM_Directional_UMItools" >Directional method from the UMI-tool</option> <option value="1MM_Directional" >Directional with stringent UMI deduplication</option> </xml> @@ -231,12 +234,12 @@ </xml> <xml name="cb_match_wl_common"> <option value="Exact" >Exact</option> - <option value="1MM" >Single match</option> + <option value="1MM" >Single match (1MM)</option> </xml> <xml name="cb_match_wl_cellranger"> - <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2)</option> - <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3)</option> - <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3)</option> + <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2, 1MM_multi)</option> + <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3, 1MM_multi_pseudocounts)</option> + <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3, 1MM_multi_Nbase_pseudocounts)</option> </xml> <xml name="solo_adapter_params"> <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." > @@ -278,6 +281,7 @@ <xml name="outCountActions"> <actions> <action name="column_names" type="metadata" default="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> + <expand macro="dbKeyAction"/> </actions> </xml> <xml name="outWig"> @@ -397,4 +401,13 @@ <when value="-" /> </conditional> </xml> + <xml name="outSAMmapqUnique"> + <!-- MAPQ 255 is the default in STAR (coming from tophat behaviour and compatibility for Cufflinks) but it is a problematic value + - according to SAM/BAM specs it means "undefined". + - Using 255 as the max mapq causes problem with modern downstream tools like mutect2: https://sites.duke.edu/workblog/2021/08/18/star-rnaseq-gatk-mutect2/ and 60 has become an inofficial replacement for 255. --> + <param argument="--outSAMmapqUnique" type="integer" value="60" min="0" max="255" + label="MAPQ value for unique mappers" + help="STAR bases the mapping quality scores of alignment records in its BAM output on the number of alternative mappings for the read. If a read maps to multiple locations on the reference genome, the following MAPQ scoring scheme is +used: >=5 mappings => MAPQ=0; 3-4 mappings => MAPQ=1; 2 mappings => MAPQ=3. This setting lets you control the MAPQ used for reads mapped to a single location. Set to 255 for compatibility with Cufflink (default in STAR) but keep to 60 for modern downstream tools like mutect2." /> + </xml> </macros>
--- a/rg_rnaStarSolo.xml Fri Feb 17 20:04:43 2023 +0000 +++ b/rg_rnaStarSolo.xml Wed Feb 22 18:01:29 2023 +0000 @@ -122,6 +122,10 @@ --soloOutFormatFeaturesGeneField3 '${solo.soloOutFormatFeaturesGeneField3}' + ## Unmapped + '$solo.outSAMunmapped' + ## Read MAPQ + --outSAMmapqUnique ${solo.outSAMmapqUnique} ## Limits @LIMITS@ @@ -189,13 +193,13 @@ <param name="GTFselect" type="select" label="Reference genome with annotation" help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions, and provide your own splice junction annonations."> - <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option> + <option value="without-gtf-with-gtf" selected='true'>use genome reference without builtin gene-model</option> <option value="with-gtf">use genome reference with builtin gene-model</option> </param> <when value="with-gtf"> <expand macro="index_selection" with_gene_model="1" /> </when> - <when value="without-gtf"> + <when value="without-gtf-with-gtf"> <expand macro="index_selection" with_gene_model="0" /> <expand macro="SJDBOPTIONS"/> </when> @@ -325,7 +329,7 @@ <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, Exact collapses only exactly matching UMIs."> <expand macro="umidedup_options" /> <option value="Exact" >Exact</option> - <option value="NoDedup" >CellRanger2-4 algorithm</option> + <option value="NoDedup" >Do not deduplicate UMIs</option> </param> <when value="1MM_All"/> <when value="1MM_Directional_UMItools"/> @@ -388,12 +392,19 @@ <expand macro="common_SAM_attributes"/> <option value="CR">CR Cellular barcode sequence bases (uncorrected)</option> <option value="CY">CY Phred quality of the cellular barcode sequence in the CR tag</option> + <option value="UR">UR UMI (uncorrected)</option> + <option value="UY">UY Phred quality of the UMI</option> <option value="GX">GX Gene ID</option> <option value="GN">GN Gene name</option> <option value="CB">CB Cell identifier (corrected)</option> <option value="UB">UB UMI (corrected)</option> + <option value="sM">sM assessment of CB and UMI</option> + <option value="sS">sS sequence of the entire barcode (CB,UMI,adapter...)</option> + <option value="sQ">quality of the entire barcode</option> </param> <param name="quantModeGene" type="boolean" truevalue="GeneCounts" falsevalue="" checked="false" label="Output global gene count" help="Can be used by MultiQC" /> + <param argument="--outSAMunmapped" type="boolean" truevalue="--outSAMunmapped Within" falsevalue="--outSAMunmapped None" checked="false" label="Output unmapped reads in the BAM" /> + <expand macro="outSAMmapqUnique"/> <expand macro="limits" /> </section> <expand macro="outWig"/> @@ -457,7 +468,6 @@ <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Barcode/Feature Statistic Summaries"/> <data name="reads_per_gene" format="tabular" label="${tool.name} on ${on_string}: combined reads per gene" from_work_dir="ReadsPerGene.out.tab"> <filter>solo['quantModeGene']</filter> - <expand macro="dbKeyActions" /> <expand macro="outCountActions" /> </data> <expand macro="outWigOutputs"/> @@ -537,11 +547,12 @@ <has_line_matching expression="ENSG00000279493\s+0\s+0\s+0" /> <has_line_matching expression="ENSG00000275464\s+38\s+1\s+40" /> </assert_contents> + <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> </output> </test> <test expect_num_outputs="6"> <!-- test 2 --> - <!-- same as above, but using custom and no reads_per_gene--> + <!-- same as above, but using custom, no reads_per_gene and include unmapped reads--> <conditional name="refGenomeSource"> <param name="geneSource" value="history" /> <param name="genomeFastaFiles" value="filtered3.Homo_sapiens.GRCh38.dna.chromosome.21.fa.gz" /> @@ -568,6 +579,7 @@ <section name="solo" > <param name="soloStrand" value="Forward" /> <param name="soloFeatures" value="Gene" /> + <param name="outSAMunmapped" value="true" /> </section> <output name="output_barcodes_filtered" > <assert_contents> @@ -597,7 +609,11 @@ <has_line_matching expression="\s+yesUMIs\s+8" /> </assert_contents> </output> - <output name="output_BAM" value="filtered3.bam" compare="sim_size" delta="600" /> + <output name="output_BAM"> + <assert_contents> + <has_size value="884669" delta="80000" /> + </assert_contents> + </output> </test> <test expect_num_outputs="6"> <!-- test 3 --> @@ -1153,6 +1169,78 @@ </assert_contents> </output> </test> + <test expect_num_outputs="7"> + <!-- test 11 indexed --> + <conditional name="refGenomeSource"> + <param name="geneSource" value="indexed" /> + <conditional name="GTFconditional"> + <param name="GTFselect" value="without-gtf-with-gtf" /> + <param name="genomeDir" value="000" /> + <param name="sjdbOverhang" value="75"/> + <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/> + </conditional> + </conditional> + <conditional name="sc" > + <param name="solo_type" value="CB_UMI_Simple" /> + <conditional name="input_types"> + <param name="use" value="repeat" /> + <param name="input1" value="pbmc_1k_v2_L001.R1.10k.fastq.gz" ftype="fastqsanger.gz" /> + <param name="input2" value="pbmc_1k_v2_L001.R2.10k.fastq.gz" ftype="fastqsanger.gz" /> + </conditional> + <param name="soloCBwhitelist" value="filtered.barcodes.txt" /> + <conditional name="params"> + <param name="chemistry" value="Cv3" /> + </conditional> + <conditional name="umidedup"> + <param name="soloUMIdedup" value="1MM_All" /> + </conditional> + </conditional> + <section name="solo" > + <conditional name="filter"> + <param name="filter_type" value="no_filter" /> + </conditional> + <param name="soloStrand" value="Forward" /> + <param name="soloFeatures" value="Gene" /> + <param name="quantModeGene" value="true" /> + </section> + <output name="output_barcodes" > + <assert_contents> + <!-- first and last line --> + <has_line line="AAACCTGAGCGCTCCA" /> + <has_line line="TTTGGTTAGTGGGCTA" /> + <has_n_lines n="394" /> + </assert_contents> + </output> + <output name="output_genes"> + <assert_contents> + <has_line_matching expression="GENE1\s+GENE1\s+Gene\s+Expression" /> + <has_n_lines n="1" /> + </assert_contents> + </output> + <output name="output_matrix" > + <assert_contents> + <has_line_matching expression="1\s+394\s+31" /> + <has_line_matching expression="1\s+2\s+1" /> + <has_n_lines n="34" /> + </assert_contents> + </output> + <output name="output_stats" > + <assert_contents> + <has_line_matching expression="\s+noUnmapped\s+6335" /> + <has_line_matching expression="\s+yesUMIs\s+33" /> + </assert_contents> + </output> + <output name="output_BAM"> + <assert_contents> + <has_size value="7133" delta="1000"/> + </assert_contents> + </output> + <output name="reads_per_gene" > + <assert_contents> + <has_line_matching expression="GENE1\s+41\s+41\s+0" /> + </assert_contents> + </output> + </test> </tests> <help><![CDATA[ **What it does**