view tabix.xml @ 4:148a92a4d0ed draft

Uploaded
author nilesh
date Wed, 10 Jul 2013 16:56:51 -0400
parents 5ff4702264f7
children
line wrap: on
line source

<tool id="tabix" name="tabix" version="0.0.1">
    <description>Generic indexer for TAB-delimited genome position files.</description>
    <requirements>
        <requirement type="package" version= "0.2.6">tabix</requirement>
    </requirements>
    <command>
        tabix
    #if str($position) == "no"
        -0
    #end if

    #if str($options.extension) == "tabular"
        #if str($options.columnseq) != ""
            -s $options.columnseq
        #end if

        #if str($options.columnstart) != ""
            -b $options.columnstart 
        #end if

        #if str($options.columnend) != ""
            -e $options.columnend
        #end if

        #if str($options.skiplines) != ""
            -S $options.skiplines
        #end if

        #if str($options.skipchar) != ""
            -c $options.skipchar
        #end if
    #else
        -p
    #end if

    $input $region
    
    </command>
    <inputs>
        <param name="input" type="data" label="Input file" format="gff,bed,sam,vcf,tabular">
             </param>
        <param name="region" type="text" optional="true" label="Regions (seperate with spaces)"> </param>
        <conditional name="optional">
            <param name= "extension" type="select" value= "tabular" label= "Input Extension"> 
                <option value="tabular">tabular</option>
                <option value="gff">gff</option>
                <option value="bed">bed</option>
                <option value="sam">sam</option>
                <option value="vcf">vcf</option>
            </param>       
            <when value="tabular">
                     <param name="columnseq" type="integer" optional= "true" label="Column of sequence name" />
                     <param name="columnstart" type="integer" optional= "true" label="Column of start chromosomal position" />
                     <param name="columnend" type="integer" optional= "true" label="Column of end chromosomal position" />
                     <param name="skiplines" type="integer" optional= "true" label="Skip first INT lines" />
                     <param name="skipchar" type="text" optional= "true" label="Skip lines started with CHAR" />
                     <param name="position" type= "select" value= "yes" label="1-based? (if not, 0-based)">
                        <option value= "yes">yes</option>
                        <option value= "no">no</option>
                    </param>
                </when>   
        </conditional>
    </inputs>

    <outputs>
        <data format="tabular" name="output" from_work_dir="${input.file_name}.tbi"/>
    </outputs>

    <help>
**What it does:** 

Tabix indexes a TAB-delimited genome position file in.tab.bgz and creates an index file in.tab.bgz.tbi when region is absent from the command-line. The input data file must be position sorted and compressed by bgzip which has a gzip(1) like interface. After indexing, tabix is able to quickly retrieve data lines overlapping regions specified in the format "chr:beginPos-endPos". Fast data retrieval also works over network if URI is given as a file name and in this case the index file will be downloaded if it is not present locally.

**Citation:**

Tabix was written by Heng Li. The BGZF library was originally implemented by Bob Handsaker and modified by Heng Li for remote file access and in-memory caching.

http://samtools.sourceforge.net/tabix.shtml

**Example:**

(grep ^"#" in.gff; grep -v ^"#" in.gff | sort -k1,1 -k4,4n) | bgzip > sorted.gff.gz;

tabix -p gff sorted.gff.gz;

tabix sorted.gff.gz chr1:10,000,000-20,000,000;

    </help>
</tool>