Mercurial > repos > iuc > data_manager_star_index_builder
changeset 10:a225487bf618 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit ae6b59a8e52fd34e2347d1fd8d34129c36779266
author | iuc |
---|---|
date | Fri, 17 Feb 2023 20:00:58 +0000 |
parents | c520a52b5174 |
children | d63c1442407f |
files | data_manager/macros.xml data_manager/rna_star_index_builder.xml |
diffstat | 2 files changed, 174 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/macros.xml Fri Sep 10 16:44:59 2021 +0000 +++ b/data_manager/macros.xml Fri Feb 17 20:00:58 2023 +0000 @@ -1,11 +1,12 @@ <macros> - <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager - whenever you make changes to the following two version tokens! + <!-- REMEMBER to bump the version of @IDX_VERSION_SUFFIX@ + whenever you make changes to the @TOOL_VERSION@ token! The data manager uses a symlink to this macro file to keep the STAR and - the index versions in sync, but you should manually adjust the +galaxy - version number. --> + the index versions in sync, but you should manually update @IDX_VERSION_SUFFIX@ --> <!-- STAR version to be used --> - <token name="@VERSION@">2.7.8a</token> + <token name="@TOOL_VERSION@">2.7.10b</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.01</token> <!-- STAR index version compatible with this version of STAR This is the STAR version that introduced the index structure expected by the current version. @@ -14,12 +15,14 @@ or by looking for the versionGenome parameter in source/parametersDefault of STAR's source code --> <token name="@IDX_VERSION@">2.7.4a</token> + <token name="@IDX_VERSION_SUFFIX@">1</token> <token name="@IDX_DATA_TABLE@">rnastar_index2x_versioned</token> <xml name="requirements"> <requirements> - <requirement type="package" version="@VERSION@">star</requirement> - <requirement type="package" version="1.9">samtools</requirement> + <requirement type="package" version="@TOOL_VERSION@">star</requirement> + <requirement type="package" version="1.16.1">samtools</requirement> + <requirement type="package" version="1.12">gzip</requirement> <yield /> </requirements> </xml> @@ -35,7 +38,7 @@ </xml> <xml name="index_selection" token_with_gene_model="0"> - <param argument="--genomeDir" name="genomeDir" type="select" + <param argument="--genomeDir" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> <options from_data_table="@IDX_DATA_TABLE@"> @@ -55,8 +58,8 @@ <citation type="doi">10.1093/bioinformatics/bts635</citation> </citations> </xml> - <xml name="@SJDBOPTIONS@" token_optional="true"> - <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="@OPTIONAL@" help="Exon junction information for mapping splices"/> + <xml name="SJDBOPTIONS"> + <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="false" help="Exon junction information for mapping splices"/> <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/> </xml> <xml name="dbKeyActions"> @@ -81,11 +84,16 @@ <token name="@TEMPINDEX@"><![CDATA[ ## Create temporary index for custom reference #if str($refGenomeSource.geneSource) == 'history': + #if $refGenomeSource.genomeFastaFiles.ext == "fasta" + ln -s '$refGenomeSource.genomeFastaFiles' refgenome.fa && + #else + gunzip -c '$refGenomeSource.genomeFastaFiles' > refgenome.fa && + #end if mkdir -p tempstargenomedir && STAR --runMode genomeGenerate --genomeDir 'tempstargenomedir' - --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}' + --genomeFastaFiles refgenome.fa ## Handle difference between indices with/without annotations #if 'GTFconditional' in $refGenomeSource: ## GTFconditional exists only in STAR, but not STARsolo @@ -109,6 +117,8 @@ --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases} #end if --runThreadN \${GALAXY_SLOTS:-4} + ## in bytes + --limitGenomeGenerateRAM \$((\${GALAXY_MEMORY_MB:-31000} * 1000000)) && #end if ]]></token> @@ -121,17 +131,15 @@ #else: '${refGenomeSource.GTFconditional.genomeDir.fields.path}' ## Handle difference between indices with/without annotations - #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf': - #if $refGenomeSource.GTFconditional.sjdbGTFfile: - --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang - --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' - #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': - --sjdbGTFtagExonParentTranscript Parent - #end if + #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf-with-gtf': + --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang + --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' + #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3': + --sjdbGTFtagExonParentTranscript Parent #end if #end if - #end if - ]]></token> + #end if + ]]></token> <token name="@READSHANDLING@" ><![CDATA[ ## Check that the input pairs are of the same type ## otherwise STARsolo will run for a long time and then error out. @@ -161,8 +169,13 @@ @FASTQ_GZ_OPTION@ #end if ]]></token> + <token name="@LIMITS@" ><![CDATA[ + --limitOutSJoneRead $getVar('algo.params.junction_limits.limitOutSJoneRead', $getVar('solo.junction_limits.limitOutSJoneRead', 1000)) + --limitOutSJcollapsed $getVar('algo.params.junction_limits.limitOutSJcollapsed', $getVar('solo.junction_limits.limitOutSJcollapsed', 1000000)) + --limitSjdbInsertNsj $getVar('algo.params.junction_limits.limitSjdbInsertNsj', $getVar('solo.junction_limits.limitSjdbInsertNsj', 1000000)) + ]]></token> <xml name="ref_selection"> - <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" /> + <param argument="--genomeFastaFiles" type="data" format="fasta,fasta.gz" label="Select a reference genome" /> <param argument="--genomeSAindexNbases" type="integer" min="2" max="16" value="14" label="Length of the SA pre-indexing string" help="Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1)"/> </xml> <xml name="stdio" > @@ -245,4 +258,143 @@ <option value="None" >No adapter clipping</option> </param> </xml> + <xml name="common_SAM_attributes"> + <option value="NH" selected="true">NH (number of reported alignments/hits for the read)</option> + <option value="HI" selected="true">HI (query hit index)</option> + <option value="AS" selected="true">AS (local alignment score)</option> + <option value="nM" selected="true">nM (number of mismatches per (paired) alignment)</option> + <option value="NM">NM (edit distance of the aligned read to the reference)</option> + <option value="MD">MD (string for mismatching positions)</option> + <option value="jM">jM (intron motifs for all junctions)</option> + <option value="jI">jI (1-based start and end of introns for all junctions)</option> + </xml> + <xml name="limits"> + <section name="junction_limits" title="Junction Limits" expanded="false"> + <param argument="--limitOutSJoneRead" type="integer" min="1" value="1000" label="Maximum number of junctions for one read (including all multimappers)" /> + <param argument="--limitOutSJcollapsed" type="integer" min="1" value="1000000" label="Maximum number of collapsed junctions" /> + <param argument="--limitSjdbInsertNsj" type="integer" min="0" value="1000000" label="Maximum number of inserts to be inserted into the genome on the fly." /> + </section> + </xml> + <xml name="outCountActions"> + <actions> + <action name="column_names" type="metadata" default="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" /> + </actions> + </xml> + <xml name="outWig"> + <conditional name="outWig"> + <param name="outWigType" type="select" label="Compute coverage"> + <option value="None">No coverage</option> + <option value="bedGraph">Yes in bedgraph format</option> + <option value="wiggle">Yes in wiggle format</option> + </param> + <when value="None"> + <!-- This is necessary for the filtering of output --> + <param name="outWigStrand" type="hidden" value="false" /> + </when> + <when value="bedGraph"> + <expand macro="outWigParams"/> + </when> + <when value="wiggle"> + <expand macro="outWigParams"/> + </when> + </conditional> + </xml> + <xml name="outWigParams"> + <param name="outWigTypeSecondWord" type="select" label="Input for coverage"> + <option value="">Default (everything that mapped)</option> + <option value="read_5p">signal from only 5’ of the 1st read</option> + <option value="read2">signal from only 2nd read</option> + </param> + <param argument="--outWigStrand" type="boolean" truevalue="Stranded" falsevalue="Unstranded" checked="true" label="collapse strands (unstranded coverage)" help="By default, the strands are separated."/> + <param argument="--outWigReferencesPrefix" type="text" value="-" label="prefix matching reference name" help="For example, set 'chr' if you mapped on an ensembl genome but you want to display on UCSC"/> + <param argument="--outWigNorm" type="boolean" truevalue="RPM" falsevalue="None" checked="true" label="Normalize coverage to million of mapped reads (RPM)"/> + </xml> + <token name="@OUTWIG@"><![CDATA[ + #if str($outWig.outWigType) != 'None': + --outWigType '$outWig.outWigType' '$outWig.outWigTypeSecondWord' + --outWigStrand '$outWig.outWigStrand' + --outWigReferencesPrefix '$outWig.outWigReferencesPrefix' + --outWigNorm '$outWig.outWigNorm' + #end if + ]]></token> + <token name="@OUTWIGOUTPUTS@"><![CDATA[ + #if str($outWig.outWigType) == "bedGraph": + && mv Signal.Unique.str1.out.bg Signal.Unique.str1.out + && mv Signal.UniqueMultiple.str1.out.bg Signal.UniqueMultiple.str1.out + #if str($outWig.outWigStrand) == "Stranded": + && mv Signal.Unique.str2.out.bg Signal.Unique.str2.out + && mv Signal.UniqueMultiple.str2.out.bg Signal.UniqueMultiple.str2.out + #end if + #elif str($outWig.outWigType) == "wiggle": + && mv Signal.Unique.str1.out.wig Signal.Unique.str1.out + && mv Signal.UniqueMultiple.str1.out.wig Signal.UniqueMultiple.str1.out + #if str($outWig.outWigStrand) == "Stranded": + && mv Signal.Unique.str2.out.wig Signal.Unique.str2.out + && mv Signal.UniqueMultiple.str2.out.wig Signal.UniqueMultiple.str2.out + #end if + #end if + ]]></token> + <xml name="outWigOutputs"> + <data format="bedgraph" name="signal_unique_str1" label="${tool.name} on ${on_string}: Coverage Uniquely mapped strand 1" from_work_dir="Signal.Unique.str1.out"> + <filter>outWig['outWigType'] != "None"</filter> + <expand macro="dbKeyActions" /> + <change_format> + <when input="outWig.outWigType" value="wiggle" format="wig" /> + </change_format> + </data> + <data format="bedgraph" name="signal_uniquemultiple_str1" label="${tool.name} on ${on_string}: Coverage Uniquely + Multiple mapped strand 1" from_work_dir="Signal.UniqueMultiple.str1.out"> + <filter>outWig['outWigType'] != "None"</filter> + <expand macro="dbKeyActions" /> + <change_format> + <when input="outWig.outWigType" value="wiggle" format="wig" /> + </change_format> + </data> + <data format="bedgraph" name="signal_unique_str2" label="${tool.name} on ${on_string}: Coverage Uniquely mapped strand 2" from_work_dir="Signal.Unique.str2.out"> + <filter>outWig['outWigType'] != "None" and outWig['outWigStrand']</filter> + <expand macro="dbKeyActions" /> + <change_format> + <when input="outWig.outWigType" value="wiggle" format="wig" /> + </change_format> + </data> + <data format="bedgraph" name="signal_uniquemultiple_str2" label="${tool.name} on ${on_string}: Coverage Uniquely + Multiple mapped strand 2" from_work_dir="Signal.UniqueMultiple.str2.out"> + <filter>outWig['outWigType'] != "None" and outWig['outWigStrand']</filter> + <expand macro="dbKeyActions" /> + <change_format> + <when input="outWig.outWigType" value="wiggle" format="wig" /> + </change_format> + </data> + </xml> + <xml name="quantMode"> + <conditional name="quantmode_output"> + <param argument="--quantMode" type="select" + label="Per gene/transcript output" + help="STAR can provide analysis results not only with respect to the reference genome, but also with respect to genes and transcripts described by a gene model. Note: This functionality requires either the selection above of a cached index with a gene model, or a gene model provided alongside the index/reference genome in GTF or GFF3 format!"> + <option value="-">No per gene or transcript output</option> + <option value="GeneCounts">Per gene read counts (GeneCounts)</option> + <option value="TranscriptomeSAM">Transcript-based BAM output (TranscriptomeSAM)</option> + <option value="TranscriptomeSAM GeneCounts">Both per gene read counts and transcript-based BAM output (TranscriptomeSAM GeneCounts)</option> + </param> + <when value="-" /> + <when value="GeneCounts" /> + <when value="TranscriptomeSAM"> + <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" + label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" + help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled." /> + </when> + <when value="TranscriptomeSAM GeneCounts"> + <param argument="--quantTranscriptomeBan" type="boolean" truevalue="IndelSoftclipSingleend" falsevalue="Singleend" + label="Exclude alignments with indels or soft clipping from the transcriptome BAM output?" + help="You will need to exclude alignments with indels and soft-clipped bases from the transcriptome BAM output for compatibility with certain transcript quantification tools, most notably RSEM. If you are using a tool, like eXpress, that can deal with indels and soft-clipped bases, you can achieve better results by leaving this option disabled." /> + </when> + </conditional> + </xml> + <xml name="quantModeNoGTF"> + <conditional name="quantmode_output"> + <param argument="--quantMode" type="select" + label="Per gene/transcript output"> + <option value="-">No per gene or transcript output as no GTF was provided</option> + </param> + <when value="-" /> + </conditional> + </xml> </macros>
--- a/data_manager/rna_star_index_builder.xml Fri Sep 10 16:44:59 2021 +0000 +++ b/data_manager/rna_star_index_builder.xml Fri Feb 17 20:00:58 2023 +0000 @@ -1,4 +1,4 @@ -<tool id="rna_star_index_builder_data_manager" name="rnastar index versioned" tool_type="manage_data" version="@IDX_VERSION@" profile="19.05"> +<tool id="rna_star_index_builder_data_manager" name="rnastar index versioned" tool_type="manage_data" version="@IDX_VERSION@+galaxy@IDX_VERSION_SUFFIX@" profile="19.05"> <description>builder</description> <macros>