view gmap/snpindex.xml @ 2:52da588232b0

Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
author Jim Johnson <jj@umn.edu>
date Fri, 21 Oct 2011 11:38:55 -0500
parents
children
line wrap: on
line source

<tool id="gmap_snpindex" name="GMAP SNP Index" version="2.0.0">
  <description>build index files for known SNPs</description>
  <requirements>
      <requirement type="binary">snpindex</requirement>
      <!-- proposed tag for added datatype dependencies -->
      <requirement type="datatype">gmapsnpindex</requirement>
      <requirement type="datatype">gmapdb</requirement>
      <requirement type="datatype">gmap_snps</requirement>
      <requirement type="datatype">snps.iit</requirement>
  </requirements>
  <version_string>snpindex --version</version_string>
  <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
  <inputs>
    <conditional name="refGenomeSource">
     <param name="genomeSource" type="select" label="Will you map to a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
        <option value="indexed">Use a built-in index</option>
        <option value="gmapdb">Use gmapdb from the history</option>
      </param>
      <when value="indexed">
        <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
          <options from_file="gmap_indices.loc">
            <column name="uid" index="0" />
            <column name="dbkey" index="1" />
            <column name="name" index="2" />
            <column name="kmers" index="3" />
            <column name="maps" index="4" />
            <column name="snps" index="5" />
            <column name="value" index="6" />
          </options>
        </param>
      </when>
      <when value="gmapdb">
        <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 
              help="A GMAP database built with GMAP Build"/>
      </when>
    </conditional>
    <conditional name="dbsnp">
      <param name="snp_source" type="select" label="Add SNP info from" >
        <option value="snpTable">UCSC SNP Table</option>
        <option value="snpFile">GMAP SNP File</option>
        <option value="snpIIT">"GMAP SNPs map from GMAP iit store</option>
      </param>
      <when value="snpTable">
        <param name="snps" type="data" format="tabular" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
        <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
        <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
          <option value="1" selected="true">1 (High)</option>
          <option value="2">2 (Medium)</option>
          <option value="3">3 (All)</option>
        </param>
      </when>
      <when value="snpFile">
        <param name="snps" type="data" format="gmap_snps" label="GMAP SNPs file" 
           help="Format (3 columns):
                &lt;br&gt;>rs62211261 21:14379270 CG
                &lt;br&gt;>rs62211262 21:14379281 CG
                &lt;br&gt;Each line must start with a &gt; character, then be followed by an
                identifier (which may have duplicates).  Then there should be the
                chromosomal coordinate of the SNP.  (Coordinates are all 1-based, so
                the first character of a chromosome is number 1.)  Finally, there
                should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
                &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.  
                If the one of these two letters does not match the allele in the reference
                sequence, that SNP will be ignored in subsequent processing as a probable error.
                The N stands for any other allele." />
      </when>
      <when value="snpIIT">
        <param name="snpIIT" type="data" format="snps.iit" label="GMAP SNPs map" help="Created by: GMAP iit store" />
      </when>
    </conditional> 
    <param name="snps_name" type="text" value="snps" label="Name for this SNP index" help="no white space characters">
    </param>
  </inputs>
  <outputs>
    <!--
    <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
    -->
    <data format="gmapsnpindex" name="output" label="${tool.name} on ${on_string} snpindex" />
  </outputs>
  <configfiles>
    <configfile name="shscript">
#!/bin/bash
#set $ds = chr(36)
#set $gt = chr(62)
#set $lt = chr(60)
#set $ad = chr(38)
#import os.path
#if $refGenomeSource.genomeSource == "gmapdb":
#set $gmapdb = $refGenomeSource.gmapdb.extra_files_path
#set $refname = $refGenomeSource.gmapdb.metadata.db_name
#else:
#set $gmapdb = $os.path.dirname($refGenomeSource.gmapindex.value) 
$refname = $os.path.basename($refGenomeSource.gmapindex.value)
#end if
#set $gmapsnpdir = $output.extra_files_path
mkdir -p $gmapsnpdir
#set $snpsname = $snps_name.__str__
#set $snpsiit = '.'.join([$snpsname,'iit'])
#set $pathsnps = $os.path.join($gmapsnpdir,$snpsname)
#set $pathsnpsiit = $os.path.join($gmapsnpdir,$snpsiit)
#if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
#if $dbsnp.snp_source == 'snpTable':
#if $dbsnp.snpsex.__str__ != 'None':
cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $pathsnps
#else:
cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $pathsnps
#end if
#elif $dbsnp.snp_source == 'snpFile':
cat $dbsnp.snps | iit_store -o $pathsnps
#elif $dbsnp.snp_source == 'snpIIT':
cat $dbsnp.snps  > $pathsnpsiit
#end if
snpindex -D $gmapdb -d $refname -V $output.extra_files_path -v $snpsname $pathsnpsiit
echo snpindex -D  $gmapdb -d $refname -V $output.extra_files_path -v $snpsname $pathsnpsiit
#end if
    </configfile>
  </configfiles>

  <tests>
  </tests> 

  <help>


**GMAP SNP Index**

GMAP SNP Index (snpindex in the GMAP documentaion) creates an index for known SNPs allowing for SNP tolerant mapping and alignment when using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program).  

You will want to read the README_

Publication_ citation: Thomas D. Wu, Colin K. Watanabe  Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310

.. _GMAP: http://research-pub.gene.com/gmap/
.. _GSNAP: http://research-pub.gene.com/gmap/
.. _README: http://research-pub.gene.com/gmap/src/README
.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859


  </help>
</tool>