diff gmap_build.xml @ 11:6adc485b6dc0 draft default tip

Uploaded
author jjohnson
date Tue, 31 Jul 2012 08:19:46 -0400
parents 93911bac43da
children
line wrap: on
line diff
--- a/gmap_build.xml	Thu Jan 05 14:31:24 2012 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,174 +0,0 @@
-<tool id="gmap_build" name="GMAP Build" version="2.0.0">
-  <description>a database genome index for GMAP and GSNAP</description>
-  <requirements>
-      <requirement type="binary">gmap_build</requirement>
-  </requirements>
-  <version_string>gmap --version</version_string>
-  <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
-  <inputs>
-    <!-- Name for this gmapdb -->
-    <param name="refname" type="text" label="Name you want to give this gmap database" help="">
-      <validator type="empty_field" message="A database name is required."/>
-    </param>
-    <!-- Input data -->
-    <repeat name="inputs" title="Reference Sequence" min="1">
-      <param name="input" type="data" format="fasta" label="reference sequence fasta" />
-    </repeat>
-
-    <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">
-      <option value="12">12</option>
-      <option value="13">13</option>
-      <option value="14">14</option>
-      <option value="15" selected="true">15</option>
-    </param>  
-    <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
-    <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
-    <conditional name="splicesite">
-      <param name="splice_source" type="select" label="Add splice and intron info from" >
-        <option value="none"></option>
-        <option value="refGeneTable">refGenes table from UCSC table browser</option>
-        <option value="gtf">GTF</option>
-        <option value="gff3">GFF3</option>
-      </param>
-      <when value="none"/>
-      <when value="refGeneTable">
-        <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
-        <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)" 
-               help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
-          <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
-        </param>
- 
-      </when>
-      <when value="gtf">
-        <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
-      </when>
-      <when value="gff3">
-        <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
-      </when>
-    </conditional> 
-    <conditional name="dbsnp">
-      <param name="snp_source" type="select" label="Add SNP info from" >
-        <option value="none"></option>
-        <option value="snpTable">UCSC SNP Table</option>
-        <option value="snpFile">GMAP SNP File</option>
-      </param>
-      <when value="none"/>
-      <when value="snpTable">
-        <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
-        <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
-        <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
-          <option value="1" selected="true">1 (High)</option>
-          <option value="2">2 (Medium)</option>
-          <option value="3">3 (All)</option>
-        </param>
-      </when>
-      <when value="snpFile">
-        <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file" 
-           help="Format (3 columns):
-                &lt;br&gt;>rs62211261 21:14379270 CG
-                &lt;br&gt;>rs62211262 21:14379281 CG
-                &lt;br&gt;Each line must start with a &gt; character, then be followed by an
-                identifier (which may have duplicates).  Then there should be the
-                chromosomal coordinate of the SNP.  (Coordinates are all 1-based, so
-                the first character of a chromosome is number 1.)  Finally, there
-                should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
-                &lt;br&gt;These alleles must correspond to the possible nucleotides on the plus strand of the genome.  
-                If the one of these two letters does not match the allele in the reference
-                sequence, that SNP will be ignored in subsequent processing as a probable error.
-                The N stands for any other allele." />
-      </when>
-    </conditional> 
-  </inputs>
-  <outputs>
-    <!--
-    <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
-    -->
-    <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
-  </outputs>
-  <configfiles>
-    <configfile name="shscript">
-#!/bin/bash
-#set $ds = chr(36)
-#set $gt = chr(62)
-#set $lt = chr(60)
-#set $ad = chr(38)
-## #set $ref_files = ''
-## #for $i in $inputs:
-  ## #set $ref_files = $ref_files $i.input
-## #end for
-## echo $ref_files
-#import os.path
-#set $gmapdb = $output.extra_files_path
-#set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
-mkdir -p $gmapdb
-## export GMAPDB required for cmetindex  and atoiindex
-export GMAPDB=$gmapdb
-#for $k in $kmer.__str__.split(','):
-gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for#
-#end for
-get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /' 
-echo "kmers: " $kmer 
-#if $splicesite.splice_source == 'refGeneTable':
-#if $splicesite.refGenes.__str__ != 'None':
-cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o  $os.path.join($mapsdir,'splicesites')
-cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o  $os.path.join($mapsdir,'introns')
-#end if
-#elif $splicesite.splice_source == 'gtf':
-#if $splicesite.gtfGenes.__str__ != 'None':
-cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o  $os.path.join($mapsdir,'splicesites')
-cat $splicesite.gtfGenes | gtf_introns | iit_store -o  $os.path.join($mapsdir,'introns')
-#end if
-#elif $splicesite.splice_source == 'gff3':
-#if $splicesite.gff3Genes.__str__ != 'None':
-cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o  $os.path.join($mapsdir,'splicesites')
-cat $splicesite.gff3Genes | gff3_introns | iit_store -o  $os.path.join($mapsdir,'introns')
-#end if
-#end if
-#if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
-#if $dbsnp.snp_source == 'snpTable':
-#if $dbsnp.snpsex.__str__ != 'None':
-cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o  $os.path.join($mapsdir,'snps')
-#else:
-cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o  $os.path.join($mapsdir,'snps')
-#end if
-#else:
-cat $dbsnp.snps | iit_store -o  $os.path.join($mapsdir,'snps')
-#end if
-snpindex -d $refname -v snps
-echo "snpindex" -d  $refname -v snps
-#end if
-#if $cmetindex.__str__ == 'yes':
-cmetindex -d $refname
-echo "cmetindex" -d $refname
-#end if
-#if $atoiindex.__str__ == 'yes':
-atoiindex -d $refname
-echo "atoiindex" -d $refname
-#end if
-get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /' 
-    </configfile>
-  </configfiles>
-
-  <tests>
-  </tests> 
-
-  <help>
-
-
-**GMAP Build**
-
-GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program).  (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)
-
-You will want to read the README_
-
-Publication_ citation: Thomas D. Wu, Colin K. Watanabe  Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
-
-.. _GMAP: http://research-pub.gene.com/gmap/
-.. _GSNAP: http://research-pub.gene.com/gmap/
-.. _README: http://research-pub.gene.com/gmap/src/README
-.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
-
-
-  </help>
-</tool>
-