Mercurial > repos > iuc > samtools_consensus
changeset 0:65edd5a6002e draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/samtools/samtools_consensus commit 8d2369dc1bdafc743920a155c508c20114ebe655
| author | iuc |
|---|---|
| date | Mon, 17 Nov 2025 07:30:30 +0000 |
| parents | |
| children | |
| files | macros.xml macros_tool_specific.xml samtools_consensus.xml test-data/consen1c.fa test-data/consen1c.sam test-data/ex1.bam test-data/ex1.fa test-data/example.bam test-data/test.sam test-data/toy.sam tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample |
| diffstat | 12 files changed, 1065 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,246 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">samtools</requirement> + <yield/> + </requirements> + </xml> + <!-- NOTE: for some tools only the version of the requirement but not the + tool's version is controlled by the TOOL_VERSION token + (because their version is ahead of the requirement version .. + please only bump the minor version in order to let the requirement + version catch up eventually). To find the tools check: + `grep "<tool" . -r | grep -v VERSION_SUFFIX | cut -d":" -f 1` --> + <token name="@TOOL_VERSION@">1.22</token> + <token name="@VERSION_SUFFIX@">1</token> + <token name="@PROFILE@">24.0</token> + <token name="@FLAGS@"><![CDATA[ + #set $flags = 0 + #if $filter + #set $flags = sum(map(int, str($filter).split(','))) + #end if + ]]></token> + <token name="@PREPARE_IDX@"><![CDATA[ + ##prepare input and indices + ln -s '$input' infile && + #if $input.is_of_type('bam'): + #if str( $input.metadata.bam_index ) != "None": + ln -s '${input.metadata.bam_index}' infile.bai && + #else: + samtools index infile infile.bai && + #end if + #elif $input.is_of_type('cram'): + #if str( $input.metadata.cram_index ) != "None": + ln -s '${input.metadata.cram_index}' infile.crai && + #else: + samtools index infile infile.crai && + #end if + #end if + ]]></token> + <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[ + ##prepare input and indices + #for $i, $bam in enumerate( $input_bams ): + ln -s '$bam' '${i}' && + #if $bam.is_of_type('bam'): + #if str( $bam.metadata.bam_index ) != "None": + ln -s '${bam.metadata.bam_index}' '${i}.bai' && + #else: + samtools index '${i}' '${i}.bai' && + #end if + #elif $bam.is_of_type('cram'): + #if str( $bam.metadata.cram_index ) != "None": + ln -s '${bam.metadata.cram_index}' '${i}.crai' && + #else: + samtools index '${i}' '${i}.crai' && + #end if + #end if + #end for + ]]></token> + <token name="@PREPARE_FASTA_IDX@"><![CDATA[ + ## Make the user-selected reference genome, if any, accessible through + ## a shell variable $reffa, index the reference if necessary, and make + ## the fai-index file available through a shell variable $reffai. + + ## For a cached genome simply sets the shell variables to point to the + ## genome file and its precalculated index. + ## For a genome from the user's history, if that genome is a plain + ## fasta file, the code creates a symlink in the pwd, creates the fai + ## index file next to it, then sets the shell variables to point to the + ## symlink and its index. + ## For a fasta.gz dataset from the user's history, it tries the same, + ## but this will only succeed if the file got compressed with bgzip. + ## For a regular gzipped file samtools faidx will fail, in which case + ## the code falls back to decompressing to plain fasta before + ## reattempting the indexing. + ## Indexing of a bgzipped file produces a regular fai index file *and* + ## a compressed gzi file. The former is identical to the fai index of + ## the uncompressed fasta. + + ## If the user has not selected a reference (it's an optional parameter + ## in some samtools wrappers), a cheetah boolean use_ref is set to + ## False to encode that fact. + + #set use_ref=True + #if $addref_cond.addref_select == "history": + #if $addref_cond.ref.is_of_type('fasta'): + reffa="reference.fa" && + ln -s '${addref_cond.ref}' \$reffa && + samtools faidx \$reffa && + #else: + reffa="reference.fa.gz" && + ln -s '${addref_cond.ref}' \$reffa && + { + samtools faidx \$reffa || + { + echo "Failed to index compressed reference. Trying decompressed ..." 1>&2 && + gzip -dc \$reffa > reference.fa && + reffa="reference.fa" && + samtools faidx \$reffa; + } + } && + #end if + reffai=\$reffa.fai && + #elif $addref_cond.addref_select == "cached": + ## in case of cached the absolute path is used which allows to read + ## a cram file without specifying the reference + reffa='${addref_cond.ref.fields.path}' && + reffai=\$reffa.fai && + #else + #set use_ref=False + #end if + ]]></token> + + <xml name="optional_reference" token_help="" token_argument=""> + <conditional name="addref_cond"> + <param name="addref_select" type="select" label="Use a reference sequence"> + <help>@HELP@</help> + <option value="no">No</option> + <option value="history">Use a genome/index from the history</option> + <option value="cached">Use a built-in genome</option> + </param> + <when value="no"/> + <when value="history"> + <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/> + </when> + <when value="cached"> + <param name="ref" argument="@ARGUMENT@" type="select" label="Reference"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/> + </options> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/> + </param> + </when> + </conditional> + </xml> + <xml name="mandatory_reference" token_help="" token_argument=""> + <conditional name="addref_cond"> + <param name="addref_select" type="select" label="Use a reference sequence"> + <help>@HELP@</help> + <option value="history">Use a genome/index from the history</option> + <option value="cached">Use a built-in genome</option> + </param> + <when value="history"> + <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/> + </when> + <when value="cached"> + <param name="ref" argument="@ARGUMENT@" type="select" label="Reference"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/> + <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> + </options> + </param> + </when> + </conditional> + </xml> + + + <token name="@ADDTHREADS@"><![CDATA[ + ##compute the number of ADDITIONAL threads to be used by samtools (-@) + addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) && + ]]></token> + <token name="@ADDMEMORY@"><![CDATA[ + ##compute the number of memory available to samtools sort (-m) + ##use only 75% of available: https://github.com/samtools/samtools/issues/831 + addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && + ((addmemory=addmemory*75/100)) && + ]]></token> + <xml name="seed_input"> + <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> + </xml> + + <!-- Include/exclude by flags + flag options --> + <xml name="inclusive_filter_macro" token_argument=""> + <param name="inclusive_filter" argument="@ARGUMENT@" type="select" multiple="True" label="Require that these flags are set"> + <expand macro="flag_options" /> + </param> + </xml> + <xml name="exclusive_filter_macro" token_argument=""> + <param name="exclusive_filter" argument="@ARGUMENT@" type="select" multiple="True" label="Exclude reads with any of the following flags set"> + <expand macro="flag_options" /> + </param> + </xml> + <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false"> + <option value="1" selected="@S1@">Read is paired</option> + <option value="2" selected="@S2@">Read is mapped in a proper pair</option> + <option value="4" selected="@S4@">Read is unmapped</option> + <option value="8" selected="@S8@">Mate is unmapped</option> + <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option> + <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option> + <option value="64" selected="@S64@">Read is the first in a pair</option> + <option value="128" selected="@S128@">Read is the second in a pair</option> + <option value="256" selected="@S256@">Alignment of the read is not primary</option> + <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option> + <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option> + <option value="2048" selected="@S2048@">Alignment is supplementary</option> + </xml> + + <!-- region specification macros and tokens for tools that allow the specification + of region by bed file / space separated list of regions --> + <token name="@REGIONS_FILE@"><![CDATA[ + #if $cond_region.select_region == 'tab': + -t '$cond_region.targetregions' + #end if + ]]></token> + <token name="@REGIONS_MANUAL@"><![CDATA[ + #if $cond_region.select_region == 'text': + #for $i, $x in enumerate($cond_region.regions_repeat): + '${x.region}' + #end for + #end if + ]]></token> + <xml name="regions_macro"> + <conditional name="cond_region"> + <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)"> + <option value="no" selected="True">No</option> + <option value="text">Manualy specify regions</option> + <option value="tab">Regions from tabular file</option> + </param> + <when value="no"/> + <when value="text"> + <repeat name="regions_repeat" min="1" default="1" title="Regions"> + <param name="region" type="text" label="region" help="format chr:from-to"> + <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator> + </param> + </repeat> + </when> + <when value="tab"> + <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" /> + </when> + </conditional> + </xml> + + <xml name="citations"> + <citations> + <citation type="doi">10.1093/gigascience/giab008</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + </citations> + </xml> + <xml name="version_command"> + <version_command><![CDATA[samtools 2>&1 | grep Version]]></version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_tool_specific.xml Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,65 @@ +<!-- This Macros file is specific to this tool it prevent dupplicate codes snippit for the options "Bayesian" and "Bayesian 1.16" settings --> +<macros> + <xml name="bayesian_settings_macro"> + <section name="settings" title="Settings" expanded="no"> + <!-- Manual vs Predefined Condetional --> + <conditional name="config_cond"> + <param name="config_param" argument="--config" type="select" label="Use configuration" help="Manually set the parameters or use one of the listed pre-defined configurations"> + <option value="manual" selected="true">Manual</option> + <option value="hiseq">hiseq</option> + <option value="hifi">hifi</option> + <option value="r10.4_sup">r10.4_sup</option> + <option value="r10.4_dup">r10.4_dup</option> + <option value="ultima">ultima</option> + </param> + <!-- set parameters manually --> + <when value="manual"> + <param argument="-C" type="integer" value="10" min="0" max="93" label="Cutoff threshold" help="Bases with Phred-style quality below this threshold are called as N"/> + <!-- MQ Settings cond --> + <conditional name="mq_cond"> + <param name="mq_param" argument="--use-MQ" type="select" checked="true" truevalue="--use-MQ" falsevalue="--no-use-MQ" label="Use MQ" help="Enable or disable the use of mapping qualities"> + <option value="--use-MQ" selected="true">Yes</option> + <option value="--no-use-MQ">No</option> + </param> + <when value="--use-MQ"> + <param name="adj_mq" argument="--adj-MQ" type="boolean" checked="true" truevalue="--adj-MQ" falsevalue="--no-adj-MQ" label="Adjust MQ" help="Scales mapping quality based on nearby mismatches using the MD tag. Ignored if the MD tag is absent"/> + <param name="mismatch_window" argument="--NM-halo" min="1" type="integer" label="Local mismatch window" value="50" help="Specifies the distance either side of the base call being considered for computing the number of local mismatches"/> + <param name="low_mq" argument="--low-MQ" type="integer" label="Minimum MQ" value="1" min="0" max="60" help="Specifies a minimum value of the mapping quality. This is not a filter and instead simply put lower cap on the values"/> + <param name="high_mq" argument="--high-MQ" type="integer" label="Maximum MQ" value="60" min="0" max="60" help="Specifies a maximum value of the mapping quality. This is not a filter and instead simply put upper cap on the values"/> + <param name="scale_mq" argument="--scale-MQ" type="float" label="Scale MQ" value="1.0" min="0" help="This is a general multiplicative mapping quality scaling factor. The effect is to globally raise or lower the quality values used in the consensus algorithm"/> + </when> + <when value="--no-use-MQ"/> + </conditional> + <param name="p_het" argument="--P-het" type="float" label="Probability of heterozygous site" value="1.0e-03" min="0" max="1" help="Sets the prior probability of a site being heterozygous in Bayesian consensus calculations. Lower values favor homozygous calls.(output is ambiguous only if --ambig is enabled)" /> + <param name="p_indel" argument="--P-indel" type="float" label="Probability of indel sites" value="2.0e-04" min="0" max="1" help="Controls the likelihood of small indels. This is used in the priors for the Bayesian calculations, and has little difference on deep data" /> + <param name="hetero_scale" argument="--het-scale" type="float" min="0" label="Heterozygous SNP probability multiplier" value="1.0e+00" help="Multiplicative factor applied to base quality when evaluating heterozygous calls. Lower values reduce false positive heterozygous calls but may increase false negatives" /> + <param name="hom_fix" argument="--homopoly-fix" type="boolean" label="Homopolymer quality correction" checked="false" truevalue="-p" falsevalue="" help="Spread low-qual bases to both ends of homopolymers (Recommended for old 454 or PacBio HiFi dat)" /> + <param name="hom_score" argument="--homopoly-score" type="float" min="0" label="Homopolymer quality scaling" optional="true" help="Scales base qualities within homopolymer runs by this factor to reduce confidence in positions prone to sequencing errors. Automatically enables (--homopoly-fix) if not already set" /> + <conditional name="calibration_cond"> + <param name="calibration_param" argument="--qual-calibration" type="select" label="Quality calibration table" help="Choose a predefined calibration for your sequencing platform or upload a custom table to adjust base quality scores."> + <option value=":hiseq">Illumina HiSeq</option> + <option value=":hifi">PacBio HiFi</option> + <option value=":r10.4_sup">Oxford Nanopore R10.4 (super accuracy model)</option> + <option value=":r10.4_dup">Oxford Nanopore R10.4 duplex reads</option> + <option value=":ultima">Ultima Genomics</option> + <option value="file" selected="true">Upload a custom file with quality calibration values</option> + </param> + <when value=":hiseq"/> + <when value=":hifi"/> + <when value=":r10.4_sup"/> + <when value=":r10.4_dup"/> + <when value=":ultima"/> + <when value="file"> + <param name="calibration_file" argument="--qual-calibration" type="data" format="txt" label="Quality calibration table" optional="true" help="Loads a quality calibration table from a file to adjust base quality scores in the consensus algorithm. Each line starts with QUAL and maps a reported quality value to calibrated Phred scores for substitution, undercall, and overcall errors: QUAL value substitution undercall overcall"/> + </when> + </conditional> + </when> + <when value="hiseq"/> + <when value="hifi"/> + <when value="r10.4_sup"/> + <when value="r10.4_dup"/> + <when value="ultima"/> + </conditional> + </section> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_consensus.xml Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,615 @@ +<tool id="samtools_consensus" name="Samtools consensus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>generate consensus from a SAM, BAM or CRAM file</description> + <macros> + <import>macros.xml</import> + <import>macros_tool_specific.xml</import> + <token name="@REF_DATA@"> + ## additional reference data + #if $use_ref: + -T "\$reffa" + #end if + </token> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ + @ADDTHREADS@ + ## prepare reference data + @PREPARE_FASTA_IDX@ + @PREPARE_IDX@ + + samtools consensus infile + -f '$format' + -@ \$addthreads + + ##### preprocessing options + --min-MQ $preprocessing_options.minimum_MQ + --min-BQ $preprocessing_options.minimum_BQ + + ### try macro + #set $std_filters = '' + #set $filter = $preprocessing_options.inclusive_filter + @FLAGS@ + #set $std_filters = $std_filters + " --rf %s" % str($flags) + #set $filter = $preprocessing_options.exclusive_filter + @FLAGS@ + #set $std_filters = $std_filters + " --ff %s" % str($flags) + $std_filters + + ##### consensus Options + -m '$consensus_options.mode' + ### simple options + #if str($consensus_options.mode) == "simple": + $consensus_options.settings.use_qual + -c $consensus_options.settings.c + -H $consensus_options.settings.H + ### bayesian options + #else + ### use predefined options + #if str($consensus_options.settings.config_cond.config_param) != "manual": + --config $consensus_options.settings.config_param + ### manually set parameters + #else + -C $consensus_options.settings.C + #if str($consensus_options.settings.config_cond.mq_cond.mq_param) == "--use-MQ": + $consensus_options.settings.config_cond.mq_cond.mq_param + $consensus_options.settings.config_cond.mq_cond.adj_mq + --NM-halo $consensus_options.settings.config_cond.mq_cond.mismatch_window + --low-MQ $consensus_options.settings.config_cond.mq_cond.low_mq + --high-MQ $consensus_options.settings.config_cond.mq_cond.high_mq + --scale-MQ $consensus_options.settings.config_cond.mq_cond.scale_mq + #end if + --P-het $consensus_options.settings.config_cond.p_het + --P-indel $consensus_options.settings.config_cond.p_indel + --het-scale $consensus_options.settings.config_cond.hetero_scale + $consensus_options.settings.config_cond.hom_fix + #if $consensus_options.settings.config_cond.hom_score: + --homopoly-score $consensus_options.settings.config_cond.hom_score + #end if + #if str($consensus_options.settings.config_cond.calibration_cond.calibration_param) != "file": + --qual-calibration '$consensus_options.settings.config_cond.calibration_param' + #else + #if $consensus_options.settings.config_cond.calibration_cond.calibration_file: + --qual-calibration '$consensus_options.settings.config_cond.calibration_cond.calibration_file' + #end if + #end if + #end if + #end if + + ### global options + --min-depth $global_settings.minimum_depth + #for $i, $x in enumerate($global_settings.reg_repeat): + #set reg = '%s:%s-%s' % (str($x.reg), str($x.start), str($x.end)) + -r "${reg}" + #end for + + + @REF_DATA@ + + ##### Output Options + -l $output_options.line_len + $output_options.all + --show-del $output_options.show_deletions + --show-ins $output_options.show_insertions + $output_options.ambig + $output_options.mark_insertions + > $output_file + ]]></command> + <!-- Parameters --> + <inputs> + <!-- Basic options --> + <param name="input" type="data" format="sam,bam,cram" label="BAM file"/> + <param argument="--format" type="select" label="Output file type" help="Produce format FMT, with 'fastq', 'fasta' and 'pileup' as permitted options."> + <option value="fasta" selected="true">FASTA</option> + <option value="fastq">FASTQ</option> + <option value="pileup">Pileup</option> + </param> + <!-- Preprocessing options (reads) --> + <section name="preprocessing_options" title="Preprocessing Options" expanded="no"> + <param name="minimum_MQ" argument="--min-MQ" type="integer" value="0" min="0" label="Minimum mapping quality" help="Filters out reads with a mapping quality below specified value"/> + <param name="minimum_BQ" argument="--min-BQ" type="integer" value="0" min="0" label="Minimum base quality" help="Filters out bases with a base quality below specified value"/> + <!-- Use macro for inclusive and axclusive flags --> + <expand macro="inclusive_filter_macro" token_argument="--rf"/> + <expand macro="exclusive_filter_macro" token_argument="--ff"/> + </section> + <!-- Consensus options Condetional --> + <conditional name="consensus_options"> + <param argument="--mode" type="select" label="Select the consensus algorithm" help="Choose the consensus algorithm. 'Simple' uses base counts to call consensus, 'Bayesian' applies a probabilistic model (Gap5) for higher accuracy, and 'Bayesian 1.16' reproduces the behavior of samtools consensus v1.16 for compatibility."> + <option value="simple">Simple</option> + <option value="bayesian" selected="true">Bayesian (Gap5)</option> + <option value="bayesian_116">Bayesian 1.16</option> + </param> + <!-- simple mode options --> + <when value="simple"> + <section name="settings" title="Settings" expanded="no"> + <param name="use_qual" argument="-q" type="boolean" truevalue="-q" falsevalue="" checked="false" label="Weight by base quality" help="Base counts are weighted by their Phred quality scores instead of counting each base equally. Improves consensus accuracy by giving higher weight to more confident base calls."/> + <param argument="-c" type="float" value="0.75" min="0" max="1" label="Minimum consensus fraction" help="Require at least C fraction of bases agreeing with the most likely consensus call to emit that base type. Failing this check will output 'N'"/> + <param argument="-H" type="float" value="0.15" min="0" max="1" label="Heterozygous fraction threshold" help="Report a heterozygous base in the consensus if the second most frequent base occurs at least H fraction of the most common base. (Requires --ambig)"/> + </section> + </when> + <!-- bayesian mode options --> + <when value="bayesian"> + <expand macro="bayesian_settings_macro"/> + </when> + <!-- bayesian 1.16 mode options --> + <when value="bayesian_116"> + <expand macro="bayesian_settings_macro"/> + </when> + </conditional> + <!-- Gloabl Settings --> + <section name="global_settings" title="Global Settings" expanded="no"> + <param name="minimum_depth" argument="--min-depth" type="integer" value="1" label="Minimum depth" help="The minimum depth required to make a call. Failing this depth check will produce consensus 'N', or absent if it is an insertion. Note this check is performed after filtering by flags and mapping/base quality"/> + <!-- region --> + <repeat name="reg_repeat" title="region" min="0" max="1"> + <param name="reg" type="text" optional="false" label="Region"/> + <param name="start" type="integer" optional="true" label="Start"/> + <param name="end" type="integer" optional="true" label="End"/> + </repeat> + <!-- use contional ref macro --> + <expand macro="optional_reference" token_argument="--reference" token_help="Select a reference genome to guide consensus generation."/> + </section> + <!-- Output Options --> + <section name="output_options" title="Output Options" expanded="no"> + <param name="line_len" argument="-l" type="integer" value="70" label="Maximum line lenght" help="The maximum line length of line-wrapped fasta and fastq formats. Set to -1 for no line wrapping."/> + <param name="all" argument="-a" type="boolean" truevalue="-a" falsevalue="" checked="false" label="Output all positions" help="Output absolutely all positions, including references with no data aligned against them"/> + <param name="show_deletions" argument="--show-del" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Show deletions" help="Whether to show deletions as '*' (if checked) or to omit from the output (if left unchecked)"/> + <param name="show_insertions" argument="--show-ins" label="Show insertions" type="boolean" truevalue="yes" falsevalue="no" checked="true" help="Whether to show insertions in the consensus"/> + <param argument="--ambig" type="boolean" truevalue="--ambig" falsevalue="" checked="false" label="IUPAC ambiguity codes" help="Enables IUPAC ambiguity codes in the consensus output. Without this the output will be limited to A, C, G, T, N and *"/> + <param name="mark_insertions" argument="--mark-ins" type="boolean" truevalue="--mark-ins" falsevalue="" checked="false" label="Mark insertions" help="Mark insertions by adding an underscore before every inserted base, plus a corresponding character in the quality for fastq format. When used in conjunction with '-a' and '--show-del', this permits an easy derivation of the consensus to reference coordinate mapping"/> + </section> + </inputs> + <!-- Outputs --> + <outputs> + <data name="output_file" format="fasta"> + <change_format> + <when input="format" value="fasta" format="fasta" /> + <when input="format" value="fastq" format="fastq" /> + <when input="format" value="pileup" format="pileup" /> + </change_format> + </data> + </outputs> + <!-- Tests --> + <tests> + <!-- 1) test format--> + <test expect_num_outputs="1"> + <param name="input" value="example.bam" ftype="bam" /> + <param name="format" value="fasta"/> + <section name="output_options"> + <param name="line_len" value="70"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression=">LR743429.1"/> + <has_n_lines n="156"/> + </assert_contents> + </output> + </test> + <!-- test pre-processing Options --> + <!-- 2) test min BQ and min MQ --> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam" /> + <section name="preprocessing_options"> + <param name="minimum_BQ" value="1"/> + <param name="minimum_MQ" value="21"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="GGTTTTATAAAANAATTAAGTCTACAGAGCAACTA"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- 3) test include flags -rf --> + <test expect_num_outputs="1"> + <param name="input" value="ex1.bam" ftype="bam" /> + <section name="preprocessing_options"> + <param name="inclusive_filter" value="2"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_size size="3k" delta="1k"/> + <has_n_lines n="46"/> + </assert_contents> + </output> + </test> + <!-- test simple mode options --> + <!-- 4) test use qual--> + <test expect_num_outputs="1"> + <param name="input" value="ex1.bam" ftype="bam"/> + <conditional name="consensus_options"> + <param name="mode" value="simple"/> + <section name="settings"> + <param name="use_qual" value="true"/> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="CAAAAATAAAGAAAAAANTTTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGTAACTG"/> + </assert_contents> + </output> + </test> + <!-- 5) test minimum call fraction --> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="simple"/> + <section name="settings"> + <param name="c" value="0.9"/> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="AGGTTTTATNAAANAANTAANTCTACAGAGCAACTA"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- 6) test minimum hetro fraction --> + <test expect_num_outputs="1"> + <param name="input" value="test.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="simple"/> + <section name="settings"> + <param name="c" value="0.5"/> + <param name="H" value="0.1"/> + </section> + </conditional> + <section name="output_options"> + <param name="ambig" value="true"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="MACGTACGTA"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + </test> + <!-- 7) test cutoff --> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <param name="C" value="5"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="TTAGATAAAGAGGATAGCTGTAGGCTCAGCGCCAT"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- 8) test use MQ defaults --> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <conditional name="mq_cond"> + <param name="mq_param" value="--use-MQ"/> + </conditional> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="NNAGATAAAGAGGATAGCNNNNNNNNCAGCNNNNN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- 9) test P-indel --> + <test expect_num_outputs="1"> + <param name="input" value="consen1c.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian_116"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <param name="p_indel" value="0.01"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="CCTTTAACGAATTTCC"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- 10) test P-het --> + <test expect_num_outputs="1"> + <param name="input" value="consen1c.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian_116"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <param name="p_indel" value="0.01"/> + <param name="p_het" value="0.01"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="NNTTTNNNGNNTTTNN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- 11) test low-high-MQ --> + <test expect_num_outputs="1"> + <param name="input" value="consen1c.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian_116"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <conditional name="mq_cond"> + <param name="mq_param" value="--use-MQ"/> + <param name="low_mq" value="0"/> + <param name="high_mq" value="60"/> + </conditional> + <param name="p_indel" value="0.01"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="AAAAACCAACCAAAAA"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- 12) test het scale --> + <test expect_num_outputs="1"> + <param name="input" value="consen1c.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <param name="p_indel" value="0.2"/> + <param name="hetero_scale" value="4"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="NNTTTNNNGNNTTTNN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- test homopoly fix --> + <test expect_num_outputs="1"> + <param name="input" value="ex1.bam" ftype="bam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <param name="hom_fix" value="true"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="CAGCCTGGCTGTGGGGGNCGCAGTGGCTGAGGGGTGNAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGC"/> + </assert_contents> + </output> + </test> + <!-- 13) test homopoly score --> + <test expect_num_outputs="1"> + <param name="input" value="ex1.bam" ftype="bam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <param name="hom_score" value="0.3"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="CAGCCTGGCTGTGGGGGNCGCAGTGGCTGAGGGGTGNAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGC"/> + </assert_contents> + </output> + </test> + <!-- 14) test pre defined settings --> + <test expect_num_outputs="1"> + <param name="input" value="consen1c.sam" ftype="sam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="ultima"/> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="NNNNNNNNNNNNNNNNNNNNNNNNNN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- test quality calibration --> + <test expect_num_outputs="1"> + <param name="input" value="ex1.bam" ftype="bam"/> + <conditional name="consensus_options"> + <param name="mode" value="bayesian"/> + <section name="settings"> + <conditional name="config_cond"> + <param name="config_param" value="manual"/> + <conditional name="calibration_cond"> + <param name="calibration_param" value=":hifi"/> + </conditional> + </conditional> + </section> + </conditional> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="TTTGNTNNANCCCCTTGCAACAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTT"/> + </assert_contents> + </output> + </test> + <!-- Global Settings + test min depth --> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam"/> + <section name="global_settings"> + <param name="minimum_depth" value="2"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="NGGTTTTATAAAANAANTAAGTCTACAGAGCAACTN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- test region --> + <test expect_num_outputs="1"> + <param name="input" value="example.bam" ftype="bam"/> + <section name="global_settings"> + <repeat name="reg_repeat"> + <param name="reg" value="LR743429.1"/> + <param name="start" value="1700"/> + <param name="end" value="1900"/> + </repeat> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="CCACCAAACAATCTGTTGTGGCTCTAGGGTCGCAGGAAGGTGCATTGCACCAAGCTCTGGCCGGAGCGAT"/> + <has_size size="216" delta="10"/> + </assert_contents> + </output> + </test> + <!-- test reference file --> + <test expect_num_outputs="1"> + <param name="input" value="consen1c.sam" ftype="sam"/> + <section name="global_settings"> + <conditional name="addref_cond"> + <param name="addref_select" value="history" /> + <param name="ref" value="consen1c.fa" /> + </conditional> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="NNNNNNNNNNTTAGGGNNNNNNNNNN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- Output Options + test show deletions --> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam" /> + <section name="output_options"> + <param name="show_deletions" value="true"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_text text="NNAGATAAAGAGGATAGCNN******NNNNN**NCAGCNNNNN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- test show insertions--> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam" /> + <section name="output_options"> + <param name="show_insertions" value="false"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression="NNAGATAAGATAGCNNNNNNNNCAGCNNNNN"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- test all parameter--> + <test expect_num_outputs="1"> + <param name="input" value="example.bam" ftype="bam" /> + <param name="format" value="fasta"/> + <section name="output_options"> + <param name="line_len" value="70"/> + <param name="all" value="true"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_line_matching expression=">LR743429.1"/> + <has_n_lines n="157"/> + </assert_contents> + </output> + </test> + <!-- test ambiguity--> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam" /> + <section name="output_options"> + <param name="ambig" value="true"/> + </section> + <output name="output_file" ftype="fasta"> + <assert_contents> + <has_text text="Y"/> + <has_text text="W"/> + <has_n_lines n="4"/> + </assert_contents> + </output> + </test> + <!-- test mark insertions--> + <test expect_num_outputs="1"> + <param name="input" value="toy.sam" ftype="sam" /> + <param name="format" value="fastq"/> + <section name="output_options"> + <param name="mark_insertions" value="true"/> + </section> + <output name="output_file" ftype="fastq"> + <assert_contents> + <has_text text="!!BB.BBB_$_5_$_55BBB$5!!!!!!!!5555!!!!!"/> + <has_n_lines n="8"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Generate a consensus sequence from a SAM, BAM, or CRAM file based on the alignment records. + +**Mode** + +Available modes are “simple” (frequency counting) and “bayesian” (Gap5-based) methods. +The default is bayesian. +For compatibility with Samtools v1.16 (which did not distinguish error types), you can use “bayesian 1.16” to reproduce the legacy behavior. + +For compatibility with older versions of Samtools (v1.16), the “bayesian_116” mode reproduces the previous consensus behavior, which does not distinguish between substitution and indel errors. + +**Bayesian configuration presets** + +- **manual** – Manually set all parameters. +- **hiseq** – Uses Illumina HiSeq calibration: `--qual-calibration :hiseq`. +- **hifi** – PacBio HiFi preset: + `--qual-calibration :hifi --homopoly-fix 0.3 --low-MQ 5 --scale-MQ 1.5 --het-scale 0.37` +- **r10.4_sup** – Oxford Nanopore R10.4 super accuracy preset: + `--qual-calibration :r10.4_sup --homopoly-fix 0.3 --low-MQ 5 --scale-MQ 1.5 --het-scale 0.37` +- **r10.4_dup** – Oxford Nanopore R10.4 duplex preset: + `--qual-calibration :r10.4_dup --homopoly-fix 0.3 --low-MQ 5 --scale-MQ 1.5 --het-scale 0.37` +- **ultima** – Ultima Genomics preset: + `--qual-calibration :ultima --homopoly-fix 0.3 --low-MQ 10 --scale-MQ 2 --het-scale 0.37` + ]]></help> + <expand macro="citations"/> + <creator> + <organization name="Galaxy Europe"/> + <person givenName="Ahmad" familyName="Mahagna" url="https://github.com/Smkingsize"/> + <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12"/> + </creator> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/consen1c.fa Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,8 @@ +>c1 +GAAAAAAAAG +>c2 +GTCCAAGGTAACCTG +>c3 +ATTCAAAAAAAACTTAGGGTCCCCCCCCTGA +>c4 +TCCCCCCCCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/consen1c.sam Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,18 @@ +@SQ SN:c1 LN:10 +@SQ SN:c2 LN:15 +@SQ SN:c3 LN:31 +@SQ SN:c4 LN:10 +@CO c2 CC***AAGGTAA***CC +@CO +@CO +s1a CC***AAGG*AA***CC +@CO +s1b CC***AAGG*AA***CC +@CO +s2a CCTTTAAGG*AATTTCC +@CO +s2b CCTTTAACG*AATTTCC +@CO +s3 CCTTTAA***AATTTCC +s1a 0 c2 3 0 6M1D4M * 0 0 CCAAGGAACC IIII&IIIII +s1b 0 c2 3 0 6M1D4M * 0 0 CCAAGGAACC IIII&IIIII +s2a 0 c2 3 0 2M3I4M1D2M3I2M * 0 0 CCTTTAAGGAATTTCC IIIIIII&IIIIIIII +s2b 0 c2 3 0 2M3I4M1D2M3I2M * 0 0 CCTTTAACGAATTTCC IIIIIIIIIIIIIIII +s3 0 c2 3 0 2M3I2M3D2M3I2M * 0 0 CCTTTAAAATTTCC IIIIII&&IIIIII +s4 0 c3 4 0 10M * 0 0 GAAAAAAAAG AAAAAZZZZZ +s5 0 c3 20 0 10M * 0 0 TCCCCACCCT AAAAAZZZZZ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ex1.fa Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,56 @@ +>seq1 +CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCT +GTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCAC +GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAG +TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTC +AGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAA +CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACC +AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT +CTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA +ATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGC +AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAAC +AACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACAC +ATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATAC +CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCT +TTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT +TCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT +GCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT +ACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGA +ACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTG +TGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTA +CGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAG +TCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC +TTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTC +TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTG +TTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGG +AGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATA +TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTC +TCCCTCGTCTTCTTA +>seq2 +TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAG +CTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT +TATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTT +CAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA +AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC +ATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAG +GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCAT +CAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATT +TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTA +AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA +ATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT +TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATA +AAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACC +TCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATA +GATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATT +AATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA +AATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGT +AAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATAT +AACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAAT +ACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGAT +GATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG +CGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATA +GCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAA +AAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAA +TTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGC +CAGAAAAAAATATTTACAGTAACT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.sam Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,7 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:chr1 LN:10 +read1 0 chr1 1 255 10M * 0 0 AACGTACGTA IIIIIIIIII +read2 0 chr1 1 255 10M * 0 0 CACGTACGTA IIIIIIIIII +read3 0 chr1 1 255 10M * 0 0 CACGTACGTA IIIIIIIIII +read4 0 chr1 1 255 10M * 0 0 AACGTACGTA IIIIIIIIII +read5 0 chr1 1 255 10M * 0 0 AACGTACGTA IIIIIIIIII \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/toy.sam Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,14 @@ +@SQ SN:ref LN:45 +@SQ SN:ref2 LN:40 +r001 163 ref 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 +r002 0 ref 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * +r003 0 ref 9 30 5H6M * 0 0 AGCTAA * +r004 0 ref 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * +r003 16 ref 29 30 6H5M * 0 0 TAGGC * +r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT * +x1 0 ref2 1 20 20M * 0 0 aggttttataaaacaaataa ???????????????????? +x2 0 ref2 2 30 21M * 0 0 ggttttataaaacaaataatt ????????????????????? +x3 0 ref2 6 30 9M4I13M * 0 0 ttataaaacAAATaattaagtctaca ?????????????????????????? +x4 0 ref2 10 30 25M * 0 0 CaaaTaattaagtctacagagcaac ????????????????????????? +x5 0 ref2 12 30 24M * 0 0 aaTaattaagtctacagagcaact ???????????????????????? +x6 0 ref2 14 30 23M * 0 0 Taattaagtctacagagcaacta ???????????????????????
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Nov 17 07:30:30 2025 +0000 @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="utf-8"?> +<tables> + <table comment_char="#" name="fasta_indexes"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>
