view iit_store.xml @ 8:a89fec682254

gmap/gsnap updated to version 2011-11-30
author Jim Johnson <jj@umn.edu>
date Thu, 08 Dec 2011 11:00:46 -0600
parents 561503a442f0
children
line wrap: on
line source

<tool id="gmap_iit_store" name="GMAP IIT" version="2.0.0">
  <description>Create a map store for known genes or SNPs</description>
  <requirements>
      <requirement type="binary">iit_store</requirement>
  </requirements>
  <version_string>iit_store --version</version_string>
  <command interpreter="command"> /bin/bash $shscript 2> $log </command>
  <inputs>
    <!-- Input data -->
    <conditional name="map">
      <param name="type" type="select" label="Make map for" >
        <option value="genes">Introns and Splice sites</option>
        <option value="snps">SNPs</option>
        <option value="gmap">GMAP Annotation</option>
      </param>
      <when value="genes">
        <conditional name="src">
          <param name="src_format" type="select" label="Add splice and intron info from" >
            <option value="refGeneTable">refGenes table from UCSC table browser</option>
            <option value="gtf">GTF</option>
            <option value="gff3">GFF3</option>
          </param>
          <when value="refGeneTable">
            <param name="genes" type="data" format="tabular" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
            <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)" 
                   help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
              <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
            </param>
          </when>
          <when value="gtf">
            <param name="genes" type="data" format="gtf" label="Genes as GTF" help="" />
          </when>
          <when value="gff3">
            <param name="genes" type="data" format="gff3" label="Genes in GFF3 format" help="" />
          </when>
        </conditional> 
        <param name="maps" type="select" display="checkboxes" multiple="true" force_select="true" label="Add splice and intron info from" >
          <option value="splicesites" selected="true">splicesites.iit</option>
          <option value="introns" selected="false">introns.iit</option>
        </param>
      </when>
      <when value="snps">
        <conditional name="src">
          <param name="src_format" type="select" label="Add SNP info from" >
            <option value="snpTable">UCSC SNP Table</option>
            <option value="snpFile">GMAP SNP File</option>
          </param>
          <when value="snpTable">
            <param name="snps" type="data" format="tabular" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
            <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
            <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
              <option value="1" selected="true">1 (High)</option>
              <option value="2">2 (Medium)</option>
              <option value="3">3 (All)</option>
            </param>
          </when>
          <when value="snpFile">
            <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file" 
               help="Format (3 columns):&lt;B&gt;
                    &lt;br&gt;>rs62211261 21:14379270 CG
                    &lt;br&gt;>rs62211262 21:14379281 CG
                    &lt;/B&gt;
                    &lt;br&gt;Each line must start with a &gt; character, then be followed by an
                    identifier (which may have duplicates).  Then there should be the
                    chromosomal coordinate of the SNP.  (Coordinates are all 1-based, so
                    the first character of a chromosome is number 1.)  Finally, there
                    should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
                    &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.  
                    If the one of these two letters does not match the allele in the reference
                    sequence, that SNP will be ignored in subsequent processing as a probable error.
                    The N stands for any other allele." />
          </when>
        </conditional> 
      </when>
      <when value="gmap">
        <param name="annotation" type="data" format="gmap_annotation" label="GMAP mapfile" 
          help="Format (2 or columns): &lt;B&gt;
                &lt;br&gt;>label coords optional_tag
                &lt;br&gt;optional_annotation (which may be zero, one, or multiple lines)
                &lt;/B&gt;
                &lt;br&gt;Each line must start with a &gt; character, then be followed by an identifier (which may have duplicates).  
                &lt;br&gt;Then there should be the chromosomal coordinate range.  (Coordinates are all 1-based, so the first character of a chromosome is number 1.)  
                &lt;br&gt;The coords should be of the form
                &lt;br&gt; chr:position
                &lt;br&gt;  chr:startposition..endposition
                &lt;br&gt;The term chr:position is equivalent to chr:position..position.  
                &lt;br&gt;If you want to indicate that the interval is on the minus strand or reverse direction, then endposition may be less than startposition.  
                " />
      </when>
    </conditional> 
  </inputs>
  <outputs>
    <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
    <data format="splicesites.iit" name="splicesites_iit" label="${tool.name} on ${on_string} splicesites.iit">
      <filter>(map['type'] == 'genes' and 'splicesites' in map['maps'])</filter>
    </data>
    <data format="introns.iit" name="introns_iit" label="${tool.name} on ${on_string} introns.iit">
      <filter>(map['type'] == 'genes' and 'introns' in map['maps'])</filter>
    </data>
    <data format="snps.iit" name="snps_iit" label="${tool.name} on ${on_string} snps.iit">
      <filter>(map['type'] == 'snps')</filter>
    </data>
    <data format="iit" name="map_iit" label="${tool.name} on ${on_string} map.iit">
      <filter>(map['type'] == 'gmap')</filter>
    </data>
  </outputs>
  <configfiles>
    <configfile name="shscript">
#!/bin/bash
#set $catcmd = 'gzcat -f'
#set $catcmd = 'cat'
#set $ds = chr(36)
#set $gt = chr(62)
#set $lt = chr(60)
#set $ad = chr(38)
#set $ep = chr(33)
#set $toerr = ''.join([$gt,$ad,'2'])
#import os.path
#if $map.type == 'genes':
if [ $ep -e $map.src.genes ]; then echo "$map.src.genes does not exist" $toerr; exit 1; fi
if [ $ep -s $map.src.genes ]; then echo "$map.src.genes is empty" $toerr; exit 2; fi
 #if $map.src.src_format == 'refGeneTable':
  #if 'splicesites' in [ $map.maps.__str__ ]:
   $catcmd $map.src.genes | psl_splicesites -s $map.src.col_skip | iit_store -o  $splicesites_iit
  #end if
  #if 'introns' in [ $map.maps.__str__ ]:
   $catcmd  $map.src.genes | psl_introns -s $map.src.col_skip | iit_store -o  $introns_iit
  #end if
 #elif $map.src.src_format == 'gtf':
  #if 'splicesites' in [ $map.maps.__str__ ]:
   $catcmd $map.src.genes | gtf_splicesites | iit_store -o  $splicesites_iit
  #end if
  #if 'introns' in [ $map.maps.__str__ ]:
   $catcmd $map.src.genes | gtf_introns | iit_store -o  $introns_iit
  #end if
 #elif $map.src.src_format == 'gff3':
  #if 'splicesites' in [ $map.maps.__str__ ]:
   $catcmd $map.src.genes | gff3_splicesites | iit_store -o  $splicesites_iit
  #end if
  #if 'introns' in [ $map.maps.__str__ ]:
   $catcmd $map.src.genes | gff3_introns | iit_store -o  $introns_iit
  #end if
 #end if
#elif $map.type == 'snps':
if [ $ep -s $map.src.snps ]; then echo "$map.src.snps is empty" $toerr; exit 2; fi
 #if $map.src.snpsex.__str__ != 'None':
  $catcmd $map.src.snps | dbsnp_iit -w $map.src.weight -e $map.src.snpsex | iit_store -o  $snps_iit
 #else:
  $catcmd $map.src.snps | dbsnp_iit -w $map.src.weight | iit_store -o $snps_iit 
 #end if
#else:
  $catcmd $map.src.snps | iit_store -o $map_iit 
#end if
    </configfile>
  </configfiles>

  <tests>
  </tests> 

  <help>


**iit_store**

GMAP IIT creates an Interval Index Tree map of known splice sites, introns, or SNPs (it uses iit_store described in the GMAP documentation).  The maps can be used in GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program).  Maps are typically used for known splice sites, introns, or SNPs.  

You will want to read the README_

Publication_ citation: Thomas D. Wu, Colin K. Watanabe  Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310

.. _GMAP: http://research-pub.gene.com/gmap/
.. _GSNAP: http://research-pub.gene.com/gmap/
.. _README: http://research-pub.gene.com/gmap/src/README
.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859


**inputs**

  </help>
</tool>