view gmap_build.xml @ 10:93911bac43da

Modifications for ToolShed proprietary data types
author Jim Johnson <jj@umn.edu>
date Thu, 05 Jan 2012 14:31:24 -0600
parents a89fec682254
children
line wrap: on
line source

<tool id="gmap_build" name="GMAP Build" version="2.0.0">
  <description>a database genome index for GMAP and GSNAP</description>
  <requirements>
      <requirement type="binary">gmap_build</requirement>
  </requirements>
  <version_string>gmap --version</version_string>
  <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
  <inputs>
    <!-- Name for this gmapdb -->
    <param name="refname" type="text" label="Name you want to give this gmap database" help="">
      <validator type="empty_field" message="A database name is required."/>
    </param>
    <!-- Input data -->
    <repeat name="inputs" title="Reference Sequence" min="1">
      <param name="input" type="data" format="fasta" label="reference sequence fasta" />
    </repeat>

    <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">
      <option value="12">12</option>
      <option value="13">13</option>
      <option value="14">14</option>
      <option value="15" selected="true">15</option>
    </param>  
    <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
    <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
    <conditional name="splicesite">
      <param name="splice_source" type="select" label="Add splice and intron info from" >
        <option value="none"></option>
        <option value="refGeneTable">refGenes table from UCSC table browser</option>
        <option value="gtf">GTF</option>
        <option value="gff3">GFF3</option>
      </param>
      <when value="none"/>
      <when value="refGeneTable">
        <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
        <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)" 
               help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
          <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
        </param>
 
      </when>
      <when value="gtf">
        <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
      </when>
      <when value="gff3">
        <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
      </when>
    </conditional> 
    <conditional name="dbsnp">
      <param name="snp_source" type="select" label="Add SNP info from" >
        <option value="none"></option>
        <option value="snpTable">UCSC SNP Table</option>
        <option value="snpFile">GMAP SNP File</option>
      </param>
      <when value="none"/>
      <when value="snpTable">
        <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
        <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
        <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
          <option value="1" selected="true">1 (High)</option>
          <option value="2">2 (Medium)</option>
          <option value="3">3 (All)</option>
        </param>
      </when>
      <when value="snpFile">
        <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file" 
           help="Format (3 columns):
                &lt;br&gt;>rs62211261 21:14379270 CG
                &lt;br&gt;>rs62211262 21:14379281 CG
                &lt;br&gt;Each line must start with a &gt; character, then be followed by an
                identifier (which may have duplicates).  Then there should be the
                chromosomal coordinate of the SNP.  (Coordinates are all 1-based, so
                the first character of a chromosome is number 1.)  Finally, there
                should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
                &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.  
                If the one of these two letters does not match the allele in the reference
                sequence, that SNP will be ignored in subsequent processing as a probable error.
                The N stands for any other allele." />
      </when>
    </conditional> 
  </inputs>
  <outputs>
    <!--
    <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
    -->
    <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
  </outputs>
  <configfiles>
    <configfile name="shscript">
#!/bin/bash
#set $ds = chr(36)
#set $gt = chr(62)
#set $lt = chr(60)
#set $ad = chr(38)
## #set $ref_files = ''
## #for $i in $inputs:
  ## #set $ref_files = $ref_files $i.input
## #end for
## echo $ref_files
#import os.path
#set $gmapdb = $output.extra_files_path
#set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
mkdir -p $gmapdb
## export GMAPDB required for cmetindex  and atoiindex
export GMAPDB=$gmapdb
#for $k in $kmer.__str__.split(','):
gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for#
#end for
get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /' 
echo "kmers: " $kmer 
#if $splicesite.splice_source == 'refGeneTable':
#if $splicesite.refGenes.__str__ != 'None':
cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o  $os.path.join($mapsdir,'splicesites')
cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o  $os.path.join($mapsdir,'introns')
#end if
#elif $splicesite.splice_source == 'gtf':
#if $splicesite.gtfGenes.__str__ != 'None':
cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o  $os.path.join($mapsdir,'splicesites')
cat $splicesite.gtfGenes | gtf_introns | iit_store -o  $os.path.join($mapsdir,'introns')
#end if
#elif $splicesite.splice_source == 'gff3':
#if $splicesite.gff3Genes.__str__ != 'None':
cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o  $os.path.join($mapsdir,'splicesites')
cat $splicesite.gff3Genes | gff3_introns | iit_store -o  $os.path.join($mapsdir,'introns')
#end if
#end if
#if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
#if $dbsnp.snp_source == 'snpTable':
#if $dbsnp.snpsex.__str__ != 'None':
cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o  $os.path.join($mapsdir,'snps')
#else:
cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o  $os.path.join($mapsdir,'snps')
#end if
#else:
cat $dbsnp.snps | iit_store -o  $os.path.join($mapsdir,'snps')
#end if
snpindex -d $refname -v snps
echo "snpindex" -d  $refname -v snps
#end if
#if $cmetindex.__str__ == 'yes':
cmetindex -d $refname
echo "cmetindex" -d $refname
#end if
#if $atoiindex.__str__ == 'yes':
atoiindex -d $refname
echo "atoiindex" -d $refname
#end if
get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /' 
    </configfile>
  </configfiles>

  <tests>
  </tests> 

  <help>


**GMAP Build**

GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program).  (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)

You will want to read the README_

Publication_ citation: Thomas D. Wu, Colin K. Watanabe  Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310

.. _GMAP: http://research-pub.gene.com/gmap/
.. _GSNAP: http://research-pub.gene.com/gmap/
.. _README: http://research-pub.gene.com/gmap/src/README
.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859


  </help>
</tool>