Mercurial > repos > iuc > rna_starsolo
diff macros.xml @ 20:45795f582ae9 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 2b3fa63863a366beef057c7f75ccbcaf0c280151
author | iuc |
---|---|
date | Tue, 27 Aug 2024 14:11:53 +0000 |
parents | 42ce70172b72 |
children |
line wrap: on
line diff
--- a/macros.xml Wed Feb 14 09:04:09 2024 +0000 +++ b/macros.xml Tue Aug 27 14:11:53 2024 +0000 @@ -5,7 +5,7 @@ the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ --> <!-- STAR version to be used --> <token name="@TOOL_VERSION@">2.7.11a</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">21.01</token> <!-- STAR index version compatible with this version of STAR This is the STAR version that introduced the index structure expected @@ -17,16 +17,14 @@ <token name="@IDX_VERSION@">2.7.4a</token> <token name="@IDX_VERSION_SUFFIX@">2</token> <token name="@IDX_DATA_TABLE@">rnastar_index2x_versioned</token> - <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">star</requirement> <requirement type="package" version="1.18">samtools</requirement> <requirement type="package" version="1.13">gzip</requirement> - <yield /> + <yield/> </requirements> </xml> - <xml name="edam"> <edam_topics> <edam_topic>topic_3170</edam_topic> @@ -36,20 +34,16 @@ <edam_operation>operation_0292</edam_operation> </edam_operations> </xml> - <xml name="index_selection" token_with_gene_model="0"> - <param argument="--genomeDir" type="select" - label="Select reference genome" - help="If your genome of interest is not listed, contact the Galaxy team"> + <param argument="--genomeDir" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> <options from_data_table="@IDX_DATA_TABLE@"> - <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" /> - <filter type="static_value" column="5" value="@IDX_VERSION@" /> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available for the selected input dataset" /> + <filter type="static_value" column="4" value="@WITH_GENE_MODEL@"/> + <filter type="static_value" column="5" value="@IDX_VERSION@"/> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> </options> </param> </xml> - <token name="@FASTQ_GZ_OPTION@"> --readFilesCommand zcat </token> @@ -59,9 +53,9 @@ </citations> </xml> <xml name="SJDBOPTIONS"> - <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/> - <param argument="--sjdbGTFfeatureExon" type="text" value="exon" label="Elements to use from the gene model to use for splice junctions" help="By default and for almost all cases: 'exon', referring to finding junctions at the RNA splice sites. This can optionally be changed to allow splicing at other levels, such as 'gene', 'transcript', 'CDS'."/> - <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> + <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/> + <param argument="--sjdbGTFfeatureExon" type="text" value="exon" label="Elements to use from the gene model to use for splice junctions" help="By default and for almost all cases: 'exon', referring to finding junctions at the RNA splice sites. This can optionally be changed to allow splicing at other levels, such as 'gene', 'transcript', 'CDS'."/> + <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> </xml> <xml name="dbKeyActions"> <actions> @@ -80,7 +74,7 @@ </when> <when value="history"> <action type="metadata" name="dbkey"> - <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" /> + <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey"/> </action> </when> </conditional> @@ -135,7 +129,7 @@ && #end if ]]></token> - <token name="@REFGENOMEHANDLING@" ><![CDATA[ + <token name="@REFGENOMEHANDLING@"><![CDATA[ --runThreadN \${GALAXY_SLOTS:-4} --genomeLoad NoSharedMemory --genomeDir @@ -154,7 +148,7 @@ #end if #end if ]]></token> - <token name="@READSHANDLING@" ><![CDATA[ + <token name="@READSHANDLING@"><![CDATA[ ## Check that the input pairs are of the same type ## otherwise STARsolo will run for a long time and then error out. ## We consume either repeats of two inputs R1 + R2 @@ -183,59 +177,57 @@ @FASTQ_GZ_OPTION@ #end if ]]></token> - <token name="@LIMITS@" ><![CDATA[ + <token name="@LIMITS@"><![CDATA[ --limitOutSJoneRead $getVar('algo.params.junction_limits.limitOutSJoneRead', $getVar('solo.junction_limits.limitOutSJoneRead', 1000)) --limitOutSJcollapsed $getVar('algo.params.junction_limits.limitOutSJcollapsed', $getVar('solo.junction_limits.limitOutSJcollapsed', 1000000)) --limitSjdbInsertNsj $getVar('algo.params.junction_limits.limitSjdbInsertNsj', $getVar('solo.junction_limits.limitSjdbInsertNsj', 1000000)) ]]></token> <xml name="ref_selection"> - <param argument="--genomeFastaFiles" type="data" format="fasta,fasta.gz" label="Select a reference genome" /> - <param argument="--genomeSAindexNbases" type="integer" min="2" max="16" value="14" label="Length of the SA pre-indexing string" help="Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1)"/> + <param argument="--genomeFastaFiles" type="data" format="fasta,fasta.gz" label="Select a reference genome"/> + <param argument="--genomeSAindexNbases" type="integer" min="2" max="16" value="14" label="Length of the SA pre-indexing string" help="Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1)"/> </xml> - <xml name="stdio" > + <xml name="stdio"> <stdio> <regex match="FATAL error" source="both" level="fatal"/> <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/> <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/> <regex match="\[sam_read1\] missing header\? Abort!" source="both" level="fatal"/> - <yield /> + <yield/> </stdio> </xml> <xml name="input_selection"> - <conditional name="input_types" > - <param name="use" type="select" label="Input Type" > - <option value="repeat" >Separate barcode and cDNA reads</option> - <option value="list_paired" >Paired collection of barcode and cDNA reads</option> + <conditional name="input_types"> + <param name="use" type="select" label="Input Type"> + <option value="repeat">Separate barcode and cDNA reads</option> + <option value="list_paired">Paired collection of barcode and cDNA reads</option> </param> <when value="repeat"> - <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true" - label="RNA-Seq FASTQ/FASTA file, Barcode reads" /> - <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true" - label="RNA-Seq FASTQ/FASTA file, cDNA reads"/> + <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true" label="RNA-Seq FASTQ/FASTA file, Barcode reads"/> + <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true" label="RNA-Seq FASTQ/FASTA file, cDNA reads"/> </when> <when value="list_paired"> - <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> + <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs"/> </when> </conditional> </xml> <xml name="input_selection_smart_seq"> - <conditional name="input_types_smart_seq" > - <param name="use" type="select" label="Input Type" > - <option value="list_single_end" >Single-end FASTQ collection</option> - <option value="list_paired_end" >Paired FASTQ collection</option> + <conditional name="input_types_smart_seq"> + <param name="use" type="select" label="Input Type"> + <option value="list_single_end">Single-end FASTQ collection</option> + <option value="list_paired_end">Paired FASTQ collection</option> </param> <when value="list_single_end"> - <param name="single_end_collection" collection_type="list" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of single-end FASTQ files" /> + <param name="single_end_collection" collection_type="list" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of single-end FASTQ files"/> </when> <when value="list_paired_end"> - <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of paired-end FASTQ files" /> + <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of paired-end FASTQ files"/> </when> </conditional> </xml> <xml name="umidedup_options"> <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other (1MM_All)</option> - <option value="1MM_Directional_UMItools" >Directional method from the UMI-tool</option> - <option value="1MM_Directional" >Directional with stringent UMI deduplication</option> + <option value="1MM_Directional_UMItools">Directional method from the UMI-tool</option> + <option value="1MM_Directional">Directional with stringent UMI deduplication</option> </xml> <xml name="anchor_types"> <option value="0">Read start</option> @@ -244,16 +236,16 @@ <option value="3">Adapter end</option> </xml> <xml name="cb_match_wl_common"> - <option value="Exact" >Exact</option> - <option value="1MM" >Single match (1MM)</option> + <option value="Exact">Exact</option> + <option value="1MM">Single match (1MM)</option> </xml> <xml name="cb_match_wl_cellranger"> - <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2, 1MM_multi)</option> - <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3, 1MM_multi_pseudocounts)</option> - <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3, 1MM_multi_Nbase_pseudocounts)</option> + <option value="1MM_multi" selected="true">Multiple matches (CellRanger 2, 1MM_multi)</option> + <option value="1MM_multi_pseudocounts">Multiple matches (CellRanger 3, 1MM_multi_pseudocounts)</option> + <option value="1MM_multi_Nbase_pseudocounts">Multimatching to WL is allowed for CBs with N-bases (CellRanger 3, 1MM_multi_Nbase_pseudocounts)</option> </xml> <xml name="solo_adapter_params"> - <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." > + <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes."> <sanitizer> <valid initial="string.digits"> <add value="-"/> @@ -265,11 +257,11 @@ </valid> </sanitizer> </param> - <param argument="--soloAdapterMismatchesNmax" type="integer" min="1" value="1" label="Maximum number of mismatches allowed in adapter sequence" /> - <param argument="--clipAdapterType" type="select" > - <option value="Hamming" selected="true" >Adapter clipping based on Hamming distance</option> - <option value="CellRanger4" >5p and 3p adapter clipping similar to CellRanger4</option> - <option value="None" >No adapter clipping</option> + <param argument="--soloAdapterMismatchesNmax" type="integer" min="1" value="1" label="Maximum number of mismatches allowed in adapter sequence"/> + <param argument="--clipAdapterType" type="select"> + <option value="Hamming" selected="true">Adapter clipping based on Hamming distance</option> + <option value="CellRanger4">5p and 3p adapter clipping similar to CellRanger4</option> + <option value="None">No adapter clipping</option> </param> </xml> <xml name="common_SAM_attributes"> @@ -284,14 +276,14 @@ </xml> <xml name="limits"> <section name="junction_limits" title="Junction Limits" expanded="false"> - <param argument="--limitOutSJoneRead" type="integer" min="1" value="1000" label="Maximum number of junctions for one read (including all multimappers)" /> - <param argument="--limitOutSJcollapsed" type="integer" min="1" value="1000000" label="Maximum number of collapsed junctions" /> - <param argument="--limitSjdbInsertNsj" type="integer" min="0" value="1000000" label="Maximum number of inserts to be inserted into the genome on the fly." /> + <param argument="--limitOutSJoneRead" type="integer" min="1" value="1000" label="Maximum number of junctions for one read (including all multimappers)"/> + <param argument="--limitOutSJcollapsed" type="integer" min="1" value="1000000" label="Maximum number of collapsed junctions"/> + <param argument="--limitSjdbInsertNsj" type="integer" min="0" value="1000000" label="Maximum number of inserts to be inserted into the genome on the fly."/> </section> </xml> <xml name="outCountActions"> <actions> - <action name="column_names" type="metadata" default="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> + <action name="column_names" type="metadata" default="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand"/> <expand macro="dbKeyAction"/> </actions> </xml> @@ -304,7 +296,7 @@ </param> <when value="None"> <!-- This is necessary for the filtering of output --> - <param name="outWigStrand" type="hidden" value="false" /> + <param name="outWigStrand" type="hidden" value="false"/> </when> <when value="bedGraph"> <expand macro="outWigParams"/> @@ -352,73 +344,92 @@ <xml name="outWigOutputs"> <data format="bedgraph" name="signal_unique_str1" label="${tool.name} on ${on_string}: Coverage Uniquely mapped strand 1" from_work_dir="Signal.Unique.str1.out"> <filter>outWig['outWigType'] != "None"</filter> - <expand macro="dbKeyActions" /> + <expand macro="dbKeyActions"/> <change_format> - <when input="outWig.outWigType" value="wiggle" format="wig" /> + <when input="outWig.outWigType" value="wiggle" format="wig"/> </change_format> </data> <data format="bedgraph" name="signal_uniquemultiple_str1" label="${tool.name} on ${on_string}: Coverage Uniquely + Multiple mapped strand 1" from_work_dir="Signal.UniqueMultiple.str1.out"> <filter>outWig['outWigType'] != "None"</filter> - <expand macro="dbKeyActions" /> + <expand macro="dbKeyActions"/> <change_format> - <when input="outWig.outWigType" value="wiggle" format="wig" /> + <when input="outWig.outWigType" value="wiggle" format="wig"/> </change_format> </data> <data format="bedgraph" name="signal_unique_str2" label="${tool.name} on ${on_string}: Coverage Uniquely mapped strand 2" from_work_dir="Signal.Unique.str2.out"> <filter>outWig['outWigType'] != "None" and outWig['outWigStrand']</filter> - <expand macro="dbKeyActions" /> + <expand macro="dbKeyActions"/> <change_format> - <when input="outWig.outWigType" value="wiggle" format="wig" /> + <when input="outWig.outWigType" value="wiggle" format="wig"/> </change_format> </data> <data format="bedgraph" name="signal_uniquemultiple_str2" label="${tool.name} on ${on_string}: Coverage Uniquely + Multiple mapped strand 2" from_work_dir="Signal.UniqueMultiple.str2.out"> <filter>outWig['outWigType'] != "None" and outWig['outWigStrand']</filter> - <expand macro="dbKeyActions" /> + <expand macro="dbKeyActions"/> <change_format> - <when input="outWig.outWigType" value="wiggle" format="wig" /> + <when input="outWig.outWigType" value="wiggle" format="wig"/> </change_format> </data> </xml> <xml name="quantMode"> <conditional name="quantmode_output"> - <param argument="--quantMode" type="select" - label="Per gene/transcript output" - help="STAR can provide analysis results not only with respect to the reference genome, but also with respect to genes and transcripts described by a gene model. Note: This functionality requires either the selection above of a cached index with a gene model, or a gene model provided alongside the index/reference genome in GTF or GFF3 format!"> + <param argument="--quantMode" type="select" label="Per gene/transcript output" help="STAR can provide analysis results not only with respect to the reference genome, but also with respect to genes and transcripts described by a gene model. Note: This functionality requires either the selection above of a cached index with a gene model, or a gene model provided alongside the index/reference genome in GTF or GFF3 format!"> <option value="-">No per gene or transcript output</option> <option value="GeneCounts">Per gene read counts (GeneCounts)</option> <option value="TranscriptomeSAM">Transcript-based BAM output (TranscriptomeSAM)</option> <option value="TranscriptomeSAM GeneCounts">Both per gene read counts and transcript-based BAM output (TranscriptomeSAM GeneCounts)</option> </param> - <when value="-" /> - <when value="GeneCounts" /> + <when value="-"/> + <when value="GeneCounts"/> <when value="TranscriptomeSAM"> - <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" - label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" - help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled." /> + <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled."/> </when> <when value="TranscriptomeSAM GeneCounts"> - <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" - label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" - help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled." /> + <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled."/> </when> </conditional> </xml> <xml name="quantModeNoGTF"> <conditional name="quantmode_output"> - <param argument="--quantMode" type="select" - label="Per gene/transcript output"> + <param argument="--quantMode" type="select" label="Per gene/transcript output"> <option value="-">No per gene or transcript output as no GTF was provided</option> </param> - <when value="-" /> + <when value="-"/> </conditional> </xml> <xml name="outSAMmapqUnique"> <!-- MAPQ 255 is the default in STAR (coming from tophat behaviour and compatibility for Cufflinks) but it is a problematic value - according to SAM/BAM specs it means "undefined". - Using 255 as the max mapq causes problem with modern downstream tools like mutect2: https://sites.duke.edu/workblog/2021/08/18/star-rnaseq-gatk-mutect2/ and 60 has become an inofficial replacement for 255. --> - <param argument="--outSAMmapqUnique" type="integer" value="60" min="0" max="255" - label="MAPQ value for unique mappers" - help="STAR bases the mapping quality scores of alignment records in its BAM output on the number of alternative mappings for the read. If a read maps to multiple locations on the reference genome, the following MAPQ scoring scheme is -used: >=5 mappings => MAPQ=0; 3-4 mappings => MAPQ=1; 2 mappings => MAPQ=3. This setting lets you control the MAPQ used for reads mapped to a single location. Set to 255 for compatibility with Cufflink (default in STAR) but keep to 60 for modern downstream tools like mutect2." /> + <param argument="--outSAMmapqUnique" type="integer" value="60" min="0" max="255" label="MAPQ value for unique mappers" help="STAR bases the mapping quality scores of alignment records in its BAM output on the number of alternative mappings for the read. If a read maps to multiple locations on the reference genome, the following MAPQ scoring scheme is used: >=5 mappings => MAPQ=0; 3-4 mappings => MAPQ=1; 2 mappings => MAPQ=3. This setting lets you control the MAPQ used for reads mapped to a single location. Set to 255 for compatibility with Cufflink (default in STAR) but keep to 60 for modern downstream tools like mutect2."/> + </xml> + <xml name="wasp"> + <!-- + This is re-implementation of the original WASP algorithm by Bryce van de Geijn, Graham McVicker, + Yoav Gilad and Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, + 1061–1063 (2015) https://www.nature.com/articles/nmeth.3582. WASP filtering is activated + with "waspOutputMode SAMtag". + --> + <conditional name="wasp_conditional"> + <param argument="--waspOutputMode" type="select" label="Actiavte WASP filtering"> + <help><![CDATA[This is a reimplementation of the original WASP algorithm by Bryce van de Geijn, Graham McVicker, + Yoav Gilad and Jonathan K Pritchard. https://doi.org/10.1038/nmeth.3582. This option will add the vW tag to the SAM output. vW:i:1 means + alignment passed WASP filtering, and all other values mean it did not:<br/> + - vW:i:2 = multi-mapping read<br/> + - vW:i:3 = variant base in the read is N (non-ACGT)<br/> + - vW:i:4 = remapped read did not map <br/> + - vW:i:5 = remapped read multi-maps <br/> + - vW:i:6 = remapped read maps to a different locus <br/> + - vW:i:7 = read overlaps too many variants <br/> + ]]> + </help> + <option value="" selected="true">No WASP filtering</option> + <option value="wasp_mode">Activate WASP filtering</option> + </param> + <when value="wasp_mode"> + <param argument="--varVCFfile" type="data" format="vcf" label="VCF file with personal variants" help="Each variant is expected to have a genotype with two alleles. The VCF file needs to have the 10th column with genotype recorded as 0/1, 1/0, 1/1 (or | instead of /)"/> + </when> + <when value=""/> + </conditional> </xml> </macros>