Mercurial > repos > devteam > samtools_calmd
changeset 2:e65c2cd0964c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_calmd commit 766da8a5f0449de99c2854aacaefb80d11ad083c
author | iuc |
---|---|
date | Fri, 30 Nov 2018 17:46:15 -0500 |
parents | 33208952b99d |
children | 3d873744df7e |
files | macros.xml samtools_calmd.xml test-data/samtools_calmd_out_2.bam |
diffstat | 3 files changed, 168 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue May 09 11:18:11 2017 -0400 +++ b/macros.xml Fri Nov 30 17:46:15 2018 -0500 @@ -1,11 +1,131 @@ <macros> <xml name="requirements"> <requirements> - <requirement type="package" version="1.3.1">samtools</requirement> + <requirement type="package" version="@TOOL_VERSION@">samtools</requirement> <yield/> </requirements> </xml> - <token name="@TOOL_VERSION@">1.3.1</token> + <token name="@TOOL_VERSION@">1.9</token> + <token name="@FLAGS@">#set $flags = sum(map(int, str($filter).split(',')))</token> + <token name="@PREPARE_IDX@"><![CDATA[ + ##prepare input and indices + ln -s '$input' infile && + #if $input.is_of_type('bam'): + #if str( $input.metadata.bam_index ) != "None": + ln -s '${input.metadata.bam_index}' infile.bai && + #else: + samtools index infile infile.bai && + #end if + #elif $input.is_of_type('cram'): + #if str( $input.metadata.cram_index ) != "None": + ln -s '${input.metadata.cram_index}' infile.crai && + #else: + samtools index infile infile.crai && + #end if + #end if + ]]></token> + <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[ + ##prepare input and indices + #for $i, $bam in enumerate( $input_bams ): + ln -s '$bam' '${i}' && + #if $bam.is_of_type('bam'): + #if str( $bam.metadata.bam_index ) != "None": + ln -s '${bam.metadata.bam_index}' '${i}.bai' && + #else: + samtools index '${i}' '${i}.bai' && + #end if + #elif $bam.is_of_type('cram'): + #if str( $bam.metadata.cram_index ) != "None": + ln -s '${bam.metadata.cram_index}' '${i}.crai' && + #else: + samtools index '${i}' '${i}.crai' && + #end if + #end if + #end for + ]]></token> + <token name="@PREPARE_FASTA_IDX@"><![CDATA[ + ##checks for reference data ($addref_cond.addref_select=="history" or =="cached") + ##and sets the -t/-T parameters accordingly: + ##- in case of history a symbolic link is used because samtools (view) will generate + ## the index which might not be possible in the directory containing the fasta file + ##- in case of cached the absolute path is used which allows to read the cram file + ## without specifying the reference + #if $addref_cond.addref_select == "history": + ln -s '${addref_cond.ref}' reference.fa && + samtools faidx reference.fa && + #set reffa="reference.fa" + #set reffai="reference.fa.fai" + #elif $addref_cond.addref_select == "cached": + #set reffa=str($addref_cond.ref.fields.path) + #set reffai=str($addref_cond.ref.fields.path)+".fai" + #else + #set reffa=None + #set reffai=None + #end if + ]]></token> + <token name="@ADDTHREADS@"><![CDATA[ + ##compute the number of ADDITIONAL threads to be used by samtools (-@) + addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) && + ]]></token> + <token name="@ADDMEMORY@"><![CDATA[ + ##compute the number of memory available to samtools sort (-m) + ##use only 75% of available: https://github.com/samtools/samtools/issues/831 + addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && + ((addmemory=addmemory*75/100)) && + ]]></token> + <xml name="seed_input"> + <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> + </xml> + <xml name="flag_options"> + <option value="1">read is paired</option> + <option value="2">read is mapped in a proper pair</option> + <option value="4">read is unmapped</option> + <option value="8">mate is unmapped</option> + <option value="16">read reverse strand</option> + <option value="32">mate reverse strand</option> + <option value="64">read is the first in a pair</option> + <option value="128">read is the second in a pair</option> + <option value="256">alignment or read is not primary</option> + <option value="512">read fails platform/vendor quality checks</option> + <option value="1024">read is a PCR or optical duplicate</option> + <option value="2048">supplementary alignment</option> + </xml> + + <!-- region specification macros and tokens for tools that allow the specification + of region by bed file / space separated list of regions --> + <token name="@REGIONS_FILE@"><![CDATA[ + #if $cond_region.select_region == 'tab': + -t '$cond_region.targetregions' + #end if + ]]></token> + <token name="@REGIONS_MANUAL@"><![CDATA[ + #if $cond_region.select_region == 'text': + #for $i, $x in enumerate($cond_region.regions_repeat): + '${x.region}' + #end for + #end if + ]]></token> + <xml name="regions_macro"> + <conditional name="cond_region"> + <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)"> + <option value="no" selected="True">No</option> + <option value="text">Manualy specify regions</option> + <option value="tab">Regions from tabular file</option> + </param> + <when value="no"/> + <when value="text"> + <repeat name="regions_repeat" min="1" default="1" title="Regions"> + <param name="region" type="text" label="region" help="format chr:from-to"> + <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator> + </param> + </repeat> + </when> + <when value="tab"> + <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" /> + </when> + </conditional> + </xml> + <xml name="citations"> <citations> <citation type="bibtex"> @@ -49,21 +169,4 @@ <exit_code range="1:" level="fatal" description="Error" /> </stdio> </xml> - <token name="@no-chrom-options@"> ------ - -.. class:: warningmark - -**No options available? How to re-detect metadata** - -If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: - -1. Click on the **pencil** icon adjacent to the dataset in the history -2. A new menu will appear in the center pane of the interface -3. Click **Datatype** tab -4. Set **New Type** to **BAM** -5. Click **Save** - -The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. - </token> </macros>
--- a/samtools_calmd.xml Tue May 09 11:18:11 2017 -0400 +++ b/samtools_calmd.xml Fri Nov 30 17:46:15 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="samtools_calmd" name="CalMD" version="2.0.1"> +<tool id="samtools_calmd" name="CalMD" version="2.0.2"> <description>recalculate MD/NM tags</description> <macros> <import>macros.xml</import> @@ -16,8 +16,9 @@ #end if samtools calmd + $baq_settings.use_baq $baq_settings.modify_quality $baq_settings.extended_baq #if str($option_set.option_sets) == 'advanced': - $option_set.change_identical $option_set.modify_quality $option_set.compute_cap $option_set.extended_baq + $option_set.change_identical -C $option_set.adjust_mq #end if -b '$input_bam' @@ -43,17 +44,36 @@ <param name="ref_fasta" type="data" format="fasta" label="Using reference file" /> </when> </conditional> + <conditional name="baq_settings"> + <param name="use_baq" argument="-r" type="select" + label="Do you also want BAQ (Base Alignment Quality) scores to be calculated?"> + <option value="">No</option> + <option value="-r">Yes, run BAQ calculation</option> + </param> + <when value=""> + <param name="modify_quality" type="hidden" value="" /> + <param name="extended_baq" type="hidden" value="" /> + </when> + <when value="-r"> + <param name="modify_quality" argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="false" + label="Use BAQ to cap read base qualities" + help="By default, BAQ scores are stored in a separate BQ tag, and the read base quality string is left untouched. When you enable this option, the BAQ scores will be used directly to modify the quality string instead." /> + <param name="extended_baq" argument="-E" type="boolean" truevalue="-E" falsevalue="" checked="false" label="Extended BAQ for better sensitivity" /> + </when> + </conditional> <conditional name="option_set"> - <param name="option_sets" type="select" label="Options"> + <param name="option_sets" type="select" label="Additional options"> <option value="default">Use defaults</option> <option value="advanced">Advanced options</option> </param> <when value="default" /> <when value="advanced"> - <param name="change_identical" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="False" label="Change identical bases to '='" /> - <param name="modify_quality" argument="-A" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Modify the quality string" /> - <param name="compute_cap" argument="-r" type="boolean" truevalue="-r" falsevalue="" checked="False" label="Compute BQ or cap baseQ by BAQ" /> - <param name="extended_baq" argument="-E" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ for better sensitivity" /> + <param name="change_identical" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="False" + label="Change identical bases to '='" + help="Replace bases in read sequences that match the reference base at that position with an equal sign" /> + <param name="adjust_mq" argument="-C" type="integer" min="0" max="255" value="0" + label="Coefficient to cap mapping quality of poorly mapped reads" + help="Higher values for this setting mean a stronger downgrade of the mapping quality of reads with excessive mismatches (50: recommended setting for reads aligned with BWA, 0: do not downgrade mapping qualities)" /> </when> </conditional> </inputs> @@ -62,6 +82,7 @@ </outputs> <tests> <test> + <param name="use_baq" value="" /> <param name="option_sets" value="default" /> <param name="input_bam" value="phiX.bam"/> <param name="reference_source_selector" value="history" /> @@ -69,9 +90,11 @@ <output name="calmd_output" file="samtools_calmd_out_1.bam" ftype="bam" /> </test> <test> + <param name="use_baq" value="-r" /> + <param name="extended_baq" value="true" /> <param name="option_sets" value="advanced" /> <param name="change_identical" value="true" /> - <param name="extended_baq" value="true" /> + <param name="adjust_mq" value="50" /> <param name="input_bam" value="phiX.bam"/> <param name="reference_source_selector" value="history" /> <param name="ref_fasta" value="phiX.fasta" /> @@ -81,16 +104,27 @@ <help><![CDATA[ **What it does** -Generates the MD tag using the ``samtools calmd`` command. If the MD tag (see SAM format reference below for explanation of SAM/BAM tags) is already present, this command will give a warning if the MD tag generated is different from the existing tag. Outputs a BAM file. +Generates the MD tag using the ``samtools calmd`` command. If the MD tag (see +SAM format reference below for explanation of SAM/BAM tags) is already present, +this command will give a warning if the MD tag generated is different from the +existing tag. + +Optionally, also generates the BQ tag to encode base alignment qualities, +caps the mapping quality of poorly mapping reads, and modifies read sequences +replacing bases matching the reference with ``=``. + +Outputs a BAM file. ----- -**NM and MD tags** +**SAM/BAM tags written by this tool** -From the SAM format specification:: +From the SAM format tag specification:: MD (string) String for mismatching positions. Regex : [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*7 - NM (indeger) Edit distance to the reference, including ambiguous bases but excluding clipping + NM (integer) Edit distance to the reference, including ambiguous bases but excluding clipping + BQ (string) String of offsets to base alignment quality (BAQ), of the same length as the read sequence. + At the i-th read base, BAQ i = Q i − (BQ i − 64) where Q i is the i-th base quality. See references for more information about SAM format tags. ]]></help>