7
|
1 <tool id="gmap_build" name="GMAP Build" version="2.0.0">
|
|
2 <description>a database genome index for GMAP and GSNAP</description>
|
|
3 <requirements>
|
|
4 <requirement type="binary">gmap_build</requirement>
|
|
5 <!-- proposed tag for added datatype dependencies -->
|
|
6 <requirement type="datatype">gmapdb</requirement>
|
|
7 <requirement type="datatype">gmap_snps</requirement>
|
|
8 </requirements>
|
|
9 <version_string>gmap --version</version_string>
|
|
10 <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
|
|
11 <inputs>
|
|
12 <!-- Name for this gmapdb -->
|
|
13 <param name="refname" type="text" label="Name you want to give this gmap database" help="">
|
|
14 <validator type="empty_field" message="A database name is required."/>
|
|
15 </param>
|
|
16 <!-- Input data -->
|
|
17 <repeat name="inputs" title="Reference Sequence" min="1">
|
|
18 <param name="input" type="data" format="fasta" label="reference sequence fasta" />
|
|
19 </repeat>
|
|
20
|
|
21 <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">
|
|
22 <option value="12">12</option>
|
|
23 <option value="13">13</option>
|
|
24 <option value="14">14</option>
|
|
25 <option value="15" selected="true">15</option>
|
|
26 </param>
|
|
27 <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>
|
|
28 <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>
|
|
29 <conditional name="splicesite">
|
|
30 <param name="splice_source" type="select" label="Add splice and intron info from" >
|
|
31 <option value="none"></option>
|
|
32 <option value="refGeneTable">refGenes table from UCSC table browser</option>
|
|
33 <option value="gtf">GTF</option>
|
|
34 <option value="gff3">GFF3</option>
|
|
35 </param>
|
|
36 <when value="none"/>
|
|
37 <when value="refGeneTable">
|
|
38 <param name="refGenes" type="data" format="tabular" optional="true" label="UCSC refGenes table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refGene.txt.gz" />
|
|
39 <param name="col_skip" type="integer" value="1" label="Columns to skip before the id/name column (default 1)"
|
|
40 help="Note that alignment tracks in UCSC sometimes have an extra column on the left.">
|
|
41 <validator type="in_range" message="The number of colmumns to skip must >= 0." min="0."/>
|
|
42 </param>
|
|
43
|
|
44 </when>
|
|
45 <when value="gtf">
|
|
46 <param name="gtfGenes" type="data" format="gtf" optional="true" label="Genes as GTF" help="" />
|
|
47 </when>
|
|
48 <when value="gff3">
|
|
49 <param name="gff3Genes" type="data" format="gff3" optional="true" label="Genes in GFF3 format" help="" />
|
|
50 </when>
|
|
51 </conditional>
|
|
52 <conditional name="dbsnp">
|
|
53 <param name="snp_source" type="select" label="Add SNP info from" >
|
|
54 <option value="none"></option>
|
|
55 <option value="snpTable">UCSC SNP Table</option>
|
|
56 <option value="snpFile">GMAP SNP File</option>
|
|
57 </param>
|
|
58 <when value="none"/>
|
|
59 <when value="snpTable">
|
|
60 <param name="snps" type="data" format="tabular" optional="true" label="UCSC SNPs table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz" />
|
|
61 <param name="snpsex" type="data" format="tabular" optional="true" label="UCSC SNP Exceptions table" help="Example: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz" />
|
|
62 <param name="weight" type="select" label="Include SNPs with at least Confidence Level" help="">
|
|
63 <option value="1" selected="true">1 (High)</option>
|
|
64 <option value="2">2 (Medium)</option>
|
|
65 <option value="3">3 (All)</option>
|
|
66 </param>
|
|
67 </when>
|
|
68 <when value="snpFile">
|
|
69 <param name="snps" type="data" format="gmap_snps" optional="true" label="GMAP SNPs file"
|
|
70 help="Format (3 columns):
|
|
71 <br>>rs62211261 21:14379270 CG
|
|
72 <br>>rs62211262 21:14379281 CG
|
|
73 <br>Each line must start with a > character, then be followed by an
|
|
74 identifier (which may have duplicates). Then there should be the
|
|
75 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so
|
|
76 the first character of a chromosome is number 1.) Finally, there
|
|
77 should be the two possible alleles: ( AC AG AT CG CT GT or AN CN GN TN)
|
|
78 <br>These alleles must correspond to the possible nucleotides on the plus strand of the genome.
|
|
79 If the one of these two letters does not match the allele in the reference
|
|
80 sequence, that SNP will be ignored in subsequent processing as a probable error.
|
|
81 The N stands for any other allele." />
|
|
82 </when>
|
|
83 </conditional>
|
|
84 </inputs>
|
|
85 <outputs>
|
|
86 <!--
|
|
87 <data format="txt" name="log" label="${tool.name} on ${on_string}: log"/>
|
|
88 -->
|
|
89 <data format="gmapdb" name="output" label="${tool.name} on ${on_string} gmapdb ${refname}" />
|
|
90 </outputs>
|
|
91 <configfiles>
|
|
92 <configfile name="shscript">
|
|
93 #!/bin/bash
|
|
94 #set $ds = chr(36)
|
|
95 #set $gt = chr(62)
|
|
96 #set $lt = chr(60)
|
|
97 #set $ad = chr(38)
|
|
98 ## #set $ref_files = ''
|
|
99 ## #for $i in $inputs:
|
|
100 ## #set $ref_files = $ref_files $i.input
|
|
101 ## #end for
|
|
102 ## echo $ref_files
|
|
103 #import os.path
|
|
104 #set $gmapdb = $output.extra_files_path
|
|
105 #set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
|
|
106 mkdir -p $gmapdb
|
|
107 ## export GMAPDB required for cmetindex and atoiindex
|
|
108 export GMAPDB=$gmapdb
|
|
109 #for $k in $kmer.__str__.split(','):
|
|
110 gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k #for i in $inputs# ${i.input}#end for#
|
|
111 #end for
|
|
112 get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /'
|
|
113 echo "kmers: " $kmer
|
|
114 #if $splicesite.splice_source == 'refGeneTable':
|
|
115 #if $splicesite.refGenes.__str__ != 'None':
|
|
116 cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites')
|
|
117 cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns')
|
|
118 #end if
|
|
119 #elif $splicesite.splice_source == 'gtf':
|
|
120 #if $splicesite.gtfGenes.__str__ != 'None':
|
|
121 cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
|
|
122 cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns')
|
|
123 #end if
|
|
124 #elif $splicesite.splice_source == 'gff3':
|
|
125 #if $splicesite.gff3Genes.__str__ != 'None':
|
|
126 cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
|
|
127 cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns')
|
|
128 #end if
|
|
129 #end if
|
|
130 #if $dbsnp.snp_source != 'none' and $dbsnp.snps.__str__ != 'None':
|
|
131 #if $dbsnp.snp_source == 'snpTable':
|
|
132 #if $dbsnp.snpsex.__str__ != 'None':
|
|
133 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps')
|
|
134 #else:
|
|
135 cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps')
|
|
136 #end if
|
|
137 #else:
|
|
138 cat $dbsnp.snps | iit_store -o $os.path.join($mapsdir,'snps')
|
|
139 #end if
|
|
140 snpindex -d $refname -v snps
|
|
141 echo "snpindex" -d $refname -v snps
|
|
142 #end if
|
|
143 #if $cmetindex.__str__ == 'yes':
|
|
144 cmetindex -d $refname
|
|
145 echo "cmetindex" -d $refname
|
|
146 #end if
|
|
147 #if $atoiindex.__str__ == 'yes':
|
|
148 atoiindex -d $refname
|
|
149 echo "atoiindex" -d $refname
|
|
150 #end if
|
|
151 get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /'
|
|
152 </configfile>
|
|
153 </configfiles>
|
|
154
|
|
155 <tests>
|
|
156 </tests>
|
|
157
|
|
158 <help>
|
|
159
|
|
160
|
|
161 **GMAP Build**
|
|
162
|
|
163 GMAP Build creates an index of a genomic sequence for mapping and alignment using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program). (GMAP Build uses GMSP commands: gmap_build, iit_store, psl_splicesites, psl_introns, gtf_splicesites, gtf_introns, gff3_splicesites, gff3_introns, dbsnp_iit, snpindex, cmetindex, and atoiindex.)
|
|
164
|
|
165 You will want to read the README_
|
|
166
|
|
167 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
|
|
168
|
|
169 .. _GMAP: http://research-pub.gene.com/gmap/
|
|
170 .. _GSNAP: http://research-pub.gene.com/gmap/
|
|
171 .. _README: http://research-pub.gene.com/gmap/src/README
|
|
172 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
|
|
173
|
|
174
|
|
175 </help>
|
|
176 </tool>
|
|
177
|