Mercurial > repos > jjohnson > gmap
diff gmap_build.xml @ 7:561503a442f0
refactor
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 08 Nov 2011 13:26:41 -0600 |
parents | |
children | a89fec682254 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gmap_build.xml Tue Nov 08 13:26:41 2011 -0600 @@ -0,0 +1,177 @@ +<tool id="gmap_build" name="GMAP Build" version="2.0.0"> + <description>a database genome index for GMAP and GSNAP</description> + <requirements> + <requirement type="binary">gmap_build</requirement> + <!-- proposed tag for added datatype dependencies --> + <requirement type="datatype">gmapdb</requirement> + <requirement type="datatype">gmap_snps</requirement> + </requirements> + <version_string>gmap --version</version_string> + <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command> + <inputs> + <!-- Name for this gmapdb --> + <param name="refname" type="text" label="Name you want to give this gmap database" help=""> + <validator type="empty_field" message="A database name is required."/> + </param> + <!-- Input data --> + <repeat name="inputs" title="Reference Sequence" min="1"> + <param name="input" type="data" format="fasta" label="reference sequence fasta" /> + </repeat> + + <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help=""> + <option value="12">12</option> + <option value="13">13</option> + <option value="14">14</option> + <option value="15" selected="true">15</option> + </param> + <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/> + <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/> + <conditional name="splicesite"> + <param name="splice_source" type="select" label="Add splice and intron info from" > + <option value="none"></option> + <option value="refGeneTable">refGenes table from UCSC table browser</option> + <option value="gtf">GTF</option> + <option value="gff3">GFF3</option> + </param> + <when value="none"/> + <when value="refGeneTable"> + <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" /> + <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)" + help="Note that alignment tracks in UCSC sometimes have an extra column on the left."> + <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/> + </param> + + </when> + <when value="gtf"> + <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" /> + </when> + <when value="gff3"> + <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" /> + </when> + </conditional> + <conditional name="dbsnp"> + <param name="snp_source" type="select" label="Add SNP info from" > + <option value="none"></option> + <option value="snpTable">UCSC SNP Table</option> + <option value="snpFile">GMAP SNP File</option> + </param> + <when value="none"/> + <when value="snpTable"> + <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" /> + <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" /> + <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help=""> + <option value="1" selected="true">1 (High)</option> + <option value="2">2 (Medium)</option> + <option value="3">3 (All)</option> + </param> + </when> + <when value="snpFile"> + <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file" + help="Format (3 columns): + <br>>rs62211261 21:14379270 CG + <br>>rs62211262 21:14379281 CG + <br>Each line must start with a > character, then be followed by an + identifier (which may have duplicates). Then there should be the + chromosomal coordinate of the SNP. (Coordinates are all 1-based, so + the first character of a chromosome is number 1.) Finally, there + should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN) + <br>These alleles must correspond to the possible nucleotides on the plus strand of the genome. + If the one of these two letters does not match the allele in the reference + sequence, that SNP will be ignored in subsequent processing as a probable error. + The N stands for any other allele." /> + </when> + </conditional> + </inputs> + <outputs> + <!-- + <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/> + --> + <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" /> + </outputs> + <configfiles> + <configfile name="shscript"> +#!/bin/bash +#set $ds = chr(36) +#set $gt = chr(62) +#set $lt = chr(60) +#set $ad = chr(38) +## #set $ref_files = '' +## #for $i in $inputs: + ## #set $ref_files = $ref_files $i.input +## #end for +## echo $ref_files +#import os.path +#set $gmapdb = $output.extra_files_path +#set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps') +mkdir -p $gmapdb +## export GMAPDB required for cmetindex and atoiindex +export GMAPDB=$gmapdb +#for $k in $kmer.__str__.split(','): +gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for# +#end for +get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /' +echo "kmers: " $kmer +#if $splicesite.splice_source == 'refGeneTable': +#if $splicesite.refGenes.__str__ != 'None': +cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites') +cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns') +#end if +#elif $splicesite.splice_source == 'gtf': +#if $splicesite.gtfGenes.__str__ != 'None': +cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites') +cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns') +#end if +#elif $splicesite.splice_source == 'gff3': +#if $splicesite.gff3Genes.__str__ != 'None': +cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites') +cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns') +#end if +#end if +#if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None': +#if $dbsnp.snp_source == 'snpTable': +#if $dbsnp.snpsex.__str__ != 'None': +cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps') +#else: +cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps') +#end if +#else: +cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps') +#end if +snpindex -d $refname -v snps +echo "snpindex" -d $refname -v snps +#end if +#if $cmetindex.__str__ == 'yes': +cmetindex -d $refname +echo "cmetindex" -d $refname +#end if +#if $atoiindex.__str__ == 'yes': +atoiindex -d $refname +echo "atoiindex" -d $refname +#end if +get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /' + </configfile> + </configfiles> + + <tests> + </tests> + + <help> + + +**GMAP Build** + +GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.) + +You will want to read the README_ + +Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310 + +.. _GMAP: http://research-pub.gene.com/gmap/ +.. _GSNAP: http://research-pub.gene.com/gmap/ +.. _README: http://research-pub.gene.com/gmap/src/README +.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859 + + + </help> +</tool> +