view tools/count_roi_variants/count_roi_variants.xml @ 2:167765a633c0 draft default tip

"Update all the pico_galaxy tools on main Tool Shed"
author peterjc
date Fri, 16 Apr 2021 22:34:00 +0000
parents 3ee6f4d0ac80
children
line wrap: on
line source

<tool id="count_roi_variants" name="Count sequence variants in region of interest" version="0.0.6">
    <description>using samtools view</description>
    <requirements>
        <requirement type="package" version="1.2">samtools</requirement>
    </requirements>
    <version_command>
python $__tool_directory__/count_roi_variants.py --version
    </version_command>
    <command detect_errors="aggressive">
python $__tool_directory__/count_roi_variants.py '$input_bam' '${input_bam.metadata.bam_index}' '$out_tabular' '$region'
    </command>
    <inputs>
        <param name="input_bam" type="data" format="bam" label="Input BAM file" />
        <param name="region" type="text" label="Region of interest" help="Use the reference:start-end syntax as in samtools.">
            <sanitizer>
                <!--
                    SAM/BAM spec says the name must match regex [!-)+-<>-~][!-~]*
                    but will focus on ASCII 33 ("!") to ASCII 126 ("~"), i.e.
                    !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
                    of which some are going to be potential trouble like !, ", $, &, ', \, `, {, |, }
                    which is how I came to this hopefully safe set..
                -->
                <valid initial="string.letters,string.digits">
                    <add value="#%+,-./:;=@^_|~" />
                    <remove value=" " />
                </valid>
                <mapping initial="none" />
            </sanitizer>
        </param>
    </inputs>
    <outputs>
        <data name="out_tabular" format="tabular" label="$input_bam.name $region variants" />
    </outputs>
    <tests>
        <test>
            <param name="input_bam" value="ex1.bam" ftype="bam" />
            <param name="region" value="chr2:400-406" />
            <output name="out_tabular" file="ex1.count_roi_variants.tabular" ftype="tabular" />
            <assert_stdout>
                <has_line line="Counted 2 variants from 40 reads spanning chr2:400-406" />
            </assert_stdout>
        </test>
        <test>
            <param name="input_bam" value="coverage_test.bam" ftype="bam" />
            <param name="region" value="ref:10-15" />
            <output name="out_tabular" file="coverage_test.count_roi_variants.tabular" ftype="tabular" />
            <assert_stdout>
                <has_line line="Counted 1 variants from 1 reads spanning ref:10-15" />
            </assert_stdout>
        </test>
        <!-- This test is a tricky one due to the NCBI's love of pipe characters in identifiers -->
        <test>
            <param name="input_bam" value="SRR639755_mito_pairs_vs_NC_010642_clc.bam" ftype="bam" />
            <param name="region" value="gi|187250362|ref|NC_010642.1|:1695-1725" />
            <output name="out_tabular" file="SRR639755_mito_pairs_vs_NC_010642_clc.count-1695-1725.tabular" ftype="tabular" />
            <assert_stdout>
                <has_line line="Counted 3 variants from 16 reads spanning gi|187250362|ref|NC_010642.1|:1695-1725" />
            </assert_stdout>
        </test>
        <test expect_failure="true" expect_exit_code="1">
            <param name="input_bam" value="SRR639755_mito_pairs_vs_NC_010642_clc.bam" ftype="bam" />
            <param name="region" value="ref:1695-1725" />
            <assert_stderr>
                <has_line line="ERROR: samtools did not recognise the region requested, can't count any variants." />
            </assert_stderr>
        </test>
    </tests>
    <help>
**What it does**

This tool runs the command ``samtools view`` from the SAMtools toolkit, getting
all the reads in your BAM file mapped to the given region of interest (ROI).
It then counts all the different sequence variants in reads spanning that ROI,
which are returned as a tab-separated table.

Reads mapped to the ROI but not spanning it completely are ignored.

Input is a sorted and indexed BAM file, the output is tabular. The first column
is the observed sequence variants within the ROI, the second column is the number
of reads with that sequence, and the third column gives this as a percentage of
the reads spanning the ROI.

====== =================================================================================
Column Description
------ ---------------------------------------------------------------------------------
     1 Sequence variant from ROI
     2 Number of reads with that sequence variant
     3 Percentage of reads with that sequence variant (2 dp)
====== =================================================================================


**Citation**

If you use this Galaxy tool in work leading to a scientific publication please
cite:

Heng Li et al (2009). The Sequence Alignment/Map format and SAMtools.
Bioinformatics 25(16), 2078-9.
https://doi.org/10.1093/bioinformatics/btp352

Peter J.A. Cock (2016), Count sequence variants in region of interest in BAM file.
http://toolshed.g2.bx.psu.edu/view/peterjc/count_roi_variants

This wrapper is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/count_roi_variants
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btp352</citation>
    </citations>
</tool>