# HG changeset patch
# User jjohnson
# Date 1318956162 14400
# Node ID d58d272914e7e7da99c097334899ffd6b60b4ab6
Uploaded
diff -r 000000000000 -r d58d272914e7 gmap/README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/README	Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,50 @@
+
+GMAP and  GSNAP use added datatypes:
+
+   add datatype definition file: lib/galaxy/datatypes/gmap.py
+
+   add the following import line to:  lib/galaxy/datatypes/registry.py
+   import gmap # added for gmap tools
+
+   add to datatypes_conf.xml
+        
+        
+        
+    
+    
+      
+    
+    
+      
+      
+      
+    
+    
+    
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+    
+      
+      
+      
+
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+      
+        
+        
+        
+          
+          
+          
+        
+        
+        
+        
+        
+      
+     
+
+    
+    
+    
+      
+        
+        
+      
+      
+      
+       
+       	
+       	
+       	
+       	
+       	
+       	
+         
+         
+         
+         
+         
+       
+       	
+       
+       
+       	
+         
+         
+         
+       
+       	
+         
+         
+         
+       
+       	
+          
+       
+       	
+         
+         
+         
+         
+       
+       
+      
+    
+    
+      
+        
+        
+      
+      
+      
+       
+       
+        
+        
+        
+       
+       	
+       	
+       	
+       	
+       
+       
+        
+        
+        
+       
+      
+    
+    
+  
+  
+    
+    
+      (split_output == False)
+      
+        
+        
+        
+        
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+        
+        
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+        
+        
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+        
+        
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+        
+        
+        
+      
+    
+  
+  
+   
+
+  
+
+**What it does**
+
+GMAP_ (Genomic Mapping and Alignment Program)  The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains.  It is developed by Thomas D. Wu of Genentech, Inc.  
+
+Publication_ citation: Thomas D. Wu, Colin K. Watanabe  Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
+
+.. _GMAP: http://research-pub.gene.com/gmap/
+.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
+
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+You will want to read the README_
+
+.. _README: http://research-pub.gene.com/gmap/src/README
+
+  
+
+
diff -r 000000000000 -r d58d272914e7 gmap/gmap_build.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/gmap_build.xml	Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,163 @@
+
+  a GMAP DB Index
+  
+      gmap_build
+  
+  gmap --version
+   /bin/bash $shscript 2>1 1> $output 
+  
+    
+    
+    
+      
+    
+    
+      
+      
+      
+      
+      
+    
+    
+    
+      
+        
+        
+        
+        
+      
+      
+      
+        
+        
+          
+        
+ 
+      
+      
+        
+      
+      
+        
+      
+     
+    
+      
+        
+        
+        
+      
+      
+      
+        
+        
+        
+          
+          
+          
+        
+      
+      
+        
+      
+     
+  
+  
+    
+    
+    
+  
+  
+    
+#!/bin/bash
+#set $ds = chr(36)
+#set $gt = chr(62)
+#set $lt = chr(60)
+#set $ad = chr(38)
+#import os.path
+#set $gmapdb = $output.extra_files_path
+#set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
+mkdir -p $gmapdb
+## export GMAPDB required for cmetindex  and atoiindex
+export GMAPDB=$gmapdb
+#for $k in $kmer.__str__.split(','):
+gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k $input
+#end for
+get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /' 
+echo "kmers: " $kmer 
+#if $splicesite.splice_source == 'refGeneTable':
+#if $splicesite.refGenes.__str__ != 'None':
+cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o  $os.path.join($mapsdir,'splicesites')
+cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o  $os.path.join($mapsdir,'introns')
+#end if
+#elif $splicesite.splice_source == 'gtf':
+#if $splicesite.gtfGenes.__str__ != 'None':
+cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o  $os.path.join($mapsdir,'splicesites')
+cat $splicesite.gtfGenes | gtf_introns | iit_store -o  $os.path.join($mapsdir,'introns')
+#end if
+#elif $splicesite.splice_source == 'gff3':
+#if $splicesite.gff3Genes.__str__ != 'None':
+cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o  $os.path.join($mapsdir,'splicesites')
+cat $splicesite.gff3Genes | gff3_introns | iit_store -o  $os.path.join($mapsdir,'introns')
+#end if
+#end if
+#if $dbsnp.snp_source == 'snpTable':
+#if $dbsnp.snps.__str__ != 'None':
+#if $dbsnp.snpsex.__str__ != 'None':
+cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o  $os.path.join($mapsdir,'snps')
+#else:
+cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o  $os.path.join($mapsdir,'snps')
+#end if
+snpindex -d $refname -v snps
+#end if
+#end if
+#if $cmetindex.__str__ == 'yes':
+cmetindex -d $refname
+echo "cmetindex" 
+#end if
+#if $atoiindex.__str__ == 'yes':
+atoiindex -d $refname
+echo "atoiindex" 
+#end if
+get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /' 
+    
+  
+
+  
+   
+
+  
+
+
+**GMAP Build**
+
+GMAP Build creates an index of a genomic sequence for alignments using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program).  
+
+You will want to read the README_
+
+Publication_ citation: Thomas D. Wu, Colin K. Watanabe  Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
+
+.. _GMAP: http://research-pub.gene.com/gmap/
+.. _GSNAP: http://research-pub.gene.com/gmap/
+.. _README: http://research-pub.gene.com/gmap/src/README
+.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
+
+
+  
+
+
diff -r 000000000000 -r d58d272914e7 gmap/gsnap.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/gsnap.xml	Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,585 @@
+
+  Genomic Short-read Nucleotide Alignment Program
+  
+      gsnap
+  
+  gsnap --version
+  
+    #import os.path, re
+    gsnap
+    --nthreads="4" --ordered
+    #if $refGenomeSource.genomeSource == "history":
+      --gseg=$refGenomeSource.ownFile
+    #elif $refGenomeSource.genomeSource == "gmapdb":
+      #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
+      --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb
+      #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
+        --kmer=$refGenomeSource.kmer
+      #end if
+      #if $refGenomeSource.splicemap != None and len($refGenomeSource.splicemap.__str__) == 2:
+        --use-splices=$refGenomeSource.splicemap
+      #end if
+      #if $refGenomeSource.snpindex != None and len($refGenomeSource.snpindex.__str__) == 2:
+        --use-snps=$refGenomeSource.snpindex
+      #end if
+    #else:
+      --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
+      #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
+        --kmer=$refGenomeSource.kmer
+      #end if
+    #end if
+    #if $mode.__str__ != '':
+      --mode=$mode
+    #end if
+    #if $computation.options == "advanced":
+      #if $computation.max_mismatches.__str__ != '':
+        --max-mismatches=$computation.max_mismatches
+      #end if
+      $computation.query_unk_mismatch
+      $computation.genome_unk_mismatch
+      #if $computation.terminal_threshold.__str__ != '':
+        --terminal-threshold=$computation.terminal_threshold
+      #end if
+      #if $computation.indel_penalty.__str__ != '':
+        --indel-penalty=$computation.indel_penalty
+      #end if
+      #if $computation.indel_endlength.__str__ != '':
+        --indel-endlength=$computation.indel_endlength
+      #end if
+      #if $computation.max_middle_insertions.__str__ != '':
+        --max-middle-insertions=$computation.max_middle_insertions
+      #end if
+      #if $computation.max_middle_deletions.__str__ != '':
+        --max-middle-deletions=$computation.max_middle_deletions
+      #end if
+      #if $computation.max_end_insertions.__str__ != '':
+        --max-end-insertions=$computation.max_end_insertions
+      #end if
+      #if $computation.max_end_deletions.__str__ != '':
+        --max-end-deletions=$computation.max_end_deletions
+      #end if
+      #if $computation.suboptimal_levels.__str__ != '':
+        --suboptimal-levels=$computation.suboptimal_levels
+      #end if
+      #if $computation.adapter_strip.__str__ != '':
+        --adapter-strip=$computation.adapter_strip
+      #end if
+      ## gmap options
+      #if $computation.gmap_mode.__str__ != '' and  $computation.gmap_mode.__str__ != 'None':
+        --gmap-mode='$computation.gmap_mode'
+      #end if
+      #if $computation.trigger_score_for_gmap.__str__ != '':
+        --trigger-score-for-gmap=$computation.trigger_score_for_gmap
+      #end if
+      #if $computation.max_gmap_pairsearch.__str__ != '' and $re.search("pairsearch",$computation.gmap_mode):
+        --max-gmap-pairsearch=$computation.max_gmap_pairsearch
+      #end if
+      #if $computation.max_gmap_terminal.__str__ != '' and $re.search("terminal",$computation.gmap_mode):
+        --max-gmap-terminal=$computation.max_gmap_terminal
+      #end if
+      #if $computation.max_gmap_improvement.__str__ != '' and $re.search("improv",$computation.gmap_mode):
+        --max-gmap-improvement=$computation.max_gmap_improvement
+      #end if
+      #if $computation.microexon_spliceprob.__str__ != '':
+        --microexon-spliceprob=$computation.microexon_spliceprob
+      #end if
+    #end if
+    #if $splicing.options == "advanced":
+      $splicing.novelsplicing
+      #if $splicing.localsplicedist.__str__ != '':
+        --localsplicedist=$splicing.localsplicedist
+      #end if
+      #if $splicing.local_splice_penalty.__str__ != '':
+        --local-splice-penalty=$splicing.local_splice_penalty
+      #end if
+      #if $splicing.distant_splice_penalty.__str__ != '':
+        --distant-splice-penalty=$splicing.distant_splice_penalty
+      #end if
+      #if $splicing.local_splice_endlength.__str__ != '':
+        --local-splice-endlength=$splicing.local_splice_endlength
+      #end if
+      #if $splicing.distant_splice_endlength.__str__ != '':
+        --distant-splice-endlength=$splicing.distant_splice_endlength
+      #end if
+      #if $splicing.distant_splice_identity.__str__ != '':
+        --distant-splice-identity=$splicing.distant_splice_identity
+      #end if
+    #end if
+    #if $output.options == "advanced":
+      #if $output.npath.__str__ != '':
+        --npath=$output.npath
+      #end if
+      $output.quiet_if_excessive
+      $output.show_refdiff
+      $output.clip_overlap
+    #end if
+    #if $result.format == "sam":
+      --format=sam
+      $result.no_sam_headers
+      #if $result.read_group_id.__str__.strip != '':
+         --read-group-id='$result.read_group_id'
+      #end if
+      #if $result.read_group_name.__str__ != '':
+         --read-group-name='$result.read_group_name'
+      #end if
+      #if $result.read_group_library.__str__ != '':
+         --read-group-library='$result.read_group_library'
+      #end if
+      #if $result.read_group_platform.__str__ != '':
+         --read-group-platform='$result.read_group_platform'
+      #end if
+      #if $result.quality_shift.__str__ != '':
+        --quality-shift=$result.quality_shift
+      #end if
+    #elif $result.format == "goby":
+      #if $result.goby_output.__str__ != '':
+        --goby-output='$result.goby_output'
+      #end if
+      #if $result.creads_window_start.__str__ != '':
+        --creads-window-start=$result.creads_window_start
+      #end if
+      #if $result.creads_window_end.__str__ != '':
+        --creads-window-end=$result.creads_window_end
+      #end if
+      $result.creads_complement
+    #end if
+    ## TODO - do we need these options (Is it tally XOR runlength?):
+    ## --tallydir=  --use-tally=tally
+    ## --runlengthdir  --use-runlength=runlength
+    #if $seq.format == "gsnap_fasta":
+      $seq.circularinput $seq.gsnap
+    #else if $seq.format == "fastq":
+      #if $seq.barcode_length.__str__ != '':
+        --barcode-length=$seq.barcode_length
+      #end if
+      #if $seq.fastq_id_start.__str__ != '':
+        --fastq-id-start=$seq.fastq_id_start
+      #end if
+      #if $seq.fastq_id_end.__str__ != '':
+        --fastq-id-end=$seq.fastq_id_end
+      #end if
+      #if $seq.filter_chastity.__str__ != 'off':
+        --filter-chastity=$seq.filter_chastity
+      #end if
+      #if $seq.paired.ispaired.__str__ == "yes":
+        #if $seq.paired.pairmax_dna.__str__ != '':
+          --pairmax-dna=$seq.paired.pairmax_dna
+        #end if
+        #if $seq.paired.pairmax_rna.__str__ != '':
+          --pairmax-rna=$seq.paired.pairmax_rna
+        #end if
+        $seq.fastq $seq.paired.fastq
+      #else
+        $seq.fastq
+      #end if
+    #end if
+    #if $split_output == True
+      2> $gsnap_stderr
+    #else
+      2> $gsnap_stderr > $results
+    #end if
+
+  
+  
+    
+     
+        
+        
+        
+      
+      
+        
+          
+            
+            
+            
+            
+            
+            
+            
+          
+        
+
+        
+          
+            
+            
+            
+            
+            
+            
+          
+        
+
+        
+          
+            
+            
+            
+            
+            
+            
+          
+        
+
+        
+          
+            
+            
+            
+            
+            
+            
+          
+        
+      
+      
+        
+        
+          
+            
+          
+        
+        
+          
+            
+          
+        
+        
+          
+            
+          
+        
+
+      
+      
+        
+      
+    
+    
+    
+      
+        
+        
+      
+      
+        
+        
+          
+          
+          
+            
+            
+              
+              
+              
+            
+            
+            
+          
+        
+        
+        
+        
+        
+          
+          
+          
+        
+      
+      
+        
+        
+      
+    
+    
+        
+        
+        
+        
+        
+    
+    
+    
+      
+        
+        
+      
+      
+      
+         
+            
+         
+         
+         
+         
+         
+         
+         
+         
+         
+         
+         
+         
+           
+           
+         
+         
+          
+         
+         
+          
+            
+            
+            
+          
+          
+          
+          
+          
+          
+            
+          
+      
+    
+
+    
+      
+        
+        
+      
+      
+      
+         
+          
+         
+         
+         
+         
+         
+         
+         
+         
+         
+      
+    
+
+    
+    
+      
+        
+        
+      
+      
+      
+        
+        
+        
+        
+      
+    
+    
+      
+        
+        
+        
+      
+      
+      
+        
+        
+        
+        
+        
+        
+      
+      
+        
+        
+        
+        
+      
+    
+     
+  
+  
+    
+    
+      (split_output == False)
+      
+        
+      
+    
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+    
+      (split_output == True)
+      
+        
+      
+    
+
+  
+  
+   
+
+  
+
+**What it does**
+
+GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.  
+Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10.
+
+.. _GSNAP: http://research-pub.gene.com/gmap/
+.. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873
+http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed
+
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+You will want to read the README_
+
+.. _README: http://research-pub.gene.com/gmap/src/README
+
+------
+
+**Input formats**
+
+Input to GSNAP should be either in FASTQ or FASTA format.  
+
+The FASTQ input may include quality scores, which will then be included in SAM
+output, if that output format is selected. 
+
+For FASTA format, you should include one line per read (or end of a
+paired-end read).  The same FASTA file can have a mixture of
+single-end and paired-end reads of varying lengths, if desired.
+
+Single-end reads:
+
+Each FASTA entry should contain one short read per line, like this
+
+>Header information
+AAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTA
+
+Each short read can have a different length.  However, the entire read
+needs to be on a single line, and may not wrap around multiple lines.
+If it extends to a second line, GSNAP will think that the read is
+paired-end.
+
+
+Paired-end reads:
+
+Each FASTA entry should contain two short reads, one per line, like
+this
+
+>Header information
+AAAACATTCTCCTCCGCATAAGCCTAGTAGATTA
+GGCGTAGGTAGAAGTAGAGGTTAAGGCGCGTCAG
+
+By default, the program assumes that the second end is in the reverse
+complement direction compared with the first end.  If they are in the
+same direction, you may need to use the --circular-input (or -c) flag.
+
+( The Galaxy tool: "FASTA Width formatter"  can be used to reformat fasta files to have single line sequences. )
+
+------
+
+**Output formats in GSNAP**
+
+SAM output format
+
+Default GSNAP format
+  See the README_
+
+
+
+
+  
+
+
diff -r 000000000000 -r d58d272914e7 gmap/lib/galaxy/datatypes/gmap.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/lib/galaxy/datatypes/gmap.py	Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,169 @@
+"""
+GMAP indexes
+"""
+import logging
+import os,os.path,re
+from data import Text
+from metadata import MetadataElement
+
+log = logging.getLogger(__name__)
+
+class GmapDB( Text ):
+    """
+    A GMAP DB for indexes
+    """
+    MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True )
+    MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True )
+    MetadataElement( name="kmers", default=[''], desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True )
+    MetadataElement( name="map_dir", desc="The maps directory", default='unknown', set_in_upload=True, readonly=True )
+    MetadataElement( name="maps", default=[''], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True )
+    MetadataElement( name="snps", default=[''], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True )
+    MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True )
+    MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True )
+    
+    file_ext = 'gmapdb'
+    is_binary = True
+    composite_type = 'auto_primary_file'
+    allow_datatype_change = False
+
+    def generate_primary_file( self, dataset = None ):
+        """ 
+        This is called only at upload to write the html file
+        cannot rename the datasets here - they come with the default unfortunately
+        """
+        return '
AutoGenerated Primary File for Composite Dataset'
+    
+    def regenerate_primary_file(self,dataset):
+        """
+        cannot do this until we are setting metadata 
+        """
+        bn = dataset.metadata.db_name
+        log.info( "GmapDB regenerate_primary_file %s" % (bn))
+        rval = ['GMAPDB %sGMAPDB %s
cmet %s
atoi %sMaps:
' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)]
+        for i,name in enumerate(dataset.metadata.maps):
+            rval.append( '- %s' % name)
+        rval.append( '
 
' )
+        f = file(dataset.file_name,'w')
+        f.write("\n".join( rval ))
+        f.write('\n')
+        f.close()
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        log.info( "GmapDB set_peek %s" % (dataset))
+        if not dataset.dataset.purged:
+            dataset.peek  = "GMAPDB index %s\n cmet %s\n atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps )
+            dataset.blurb = "GMAPDB %s" % ( dataset.metadata.db_name )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "GMAP index file"
+    def sniff( self, filename ):
+        return False
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        """
+        Expecting:
+        extra_files_path//db_name>.ref3
+        extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp
+        extra_files_path/db_name/db_name.ref1[2345]1[2345]3positions
+        extra_files_path/db_name/db_name.ref1[2345]1[2345]3gammaptrs
+        index maps: 
+        extra_files_path/db_name/db_name.maps/*.iit
+        """
+        log.info( "GmapDB set_meta %s %s" % (dataset,dataset.extra_files_path))
+        pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?'
+        efp = dataset.extra_files_path
+        flist = os.listdir(efp)
+        for i,fname in enumerate(flist):
+            log.info( "GmapDB set_meta %s %s" % (i,fname))
+            fpath = os.path.join(efp,fname)
+            if os.path.isdir(fpath):
+                ilist = os.listdir(fpath)
+                kmers = {'':'default'} # HACK  '' empty key  added so user has default choice when selecting kmer from metadata
+                for j,iname in enumerate(ilist):
+                    log.info( "GmapDB set_meta file %s %s" % (j,iname))
+                    ipath = os.path.join(fpath,iname)
+                    if os.path.isdir(ipath):  # find maps
+                        dataset.metadata.map_dir = iname
+                        for mapfile in os.listdir(ipath):
+                            mapname = mapfile.replace('.iit','')
+                            log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile))
+                            dataset.metadata.maps.append(mapname)
+                    else: 
+                        m = re.match(pat,iname)
+                        if m:
+                            log.info( "GmapDB set_meta m %s %s " % (iname, m))
+                            assert len(m.groups()) == 10
+                            dataset.metadata.db_name = fname
+                            if m.groups()[2] == 'ref':
+                                if m.groups()[-1] != None:
+                                    dataset.metadata.snps.append(m.groups()[-1])
+                                else:
+                                    if m.groups()[-3] != None:
+                                        k = int(m.groups()[-3])
+                                        kmers[k] = k
+                                    if m.groups()[-4] != None:
+                                        dataset.metadata.basesize = int( m.groups()[-4])
+                            elif m.groups()[3] == 'met':
+                                dataset.metadata.cmet = True
+                            elif m.groups()[4] == 'a2i':
+                                dataset.metadata.atoi = True
+                dataset.metadata.kmers = kmers.keys()
+
+##  class IntervalIndexTree( Text ):
+##      """
+##      A GMAP Interval Index Tree Map
+##      created by iit_store
+##      (/path/to/map)/(mapname).iit
+##      """
+##      MetadataElement( name="map_name", desc="The map name for this index set", default='unknown', set_in_upload=True, readonly=False )
+##      file_ext = 'iit'
+##      is_binary = True
+##      composite_type = 'auto_primary_file'
+##      allow_datatype_change = False
+##  
+##  class IntervalAnnotation(data.Text):
+##      """
+##      Class describing a GMAP Interval format:
+##          >label coords optional_tag
+##          optional_annotation (which may be zero, one, or multiple lines)
+##      The coords should be of the form:
+##          chr:position
+##          chr:startposition..endposition
+##      """
+##      file_ext = 'gmapannotation'
+##  
+##  class SpliceSiteAnnotation(IntervalAnnotation):
+##      file_ext = 'gmapsplicesites'
+##      """
+##      Example:
+##          >NM_004448.ERBB2.exon1 17:35110090..35110091 donor 6678
+##          >NM_004448.ERBB2.exon2 17:35116768..35116769 acceptor 6678
+##          >NM_004448.ERBB2.exon2 17:35116920..35116921 donor 1179
+##          >NM_004448.ERBB2.exon3 17:35118099..35118100 acceptor 1179
+##          >NM_004449.ERG.exon1 21:38955452..38955451 donor 783
+##          >NM_004449.ERG.exon2 21:38878740..38878739 acceptor 783
+##          >NM_004449.ERG.exon2 21:38878638..38878637 donor 360
+##          >NM_004449.ERG.exon3 21:38869542..38869541 acceptor 360
+##      """
+##  
+##  class IntronAnnotation(IntervalAnnotation):
+##      file_ext = 'gmapintrons'
+##      """
+##      Example:
+##          >NM_004448.ERBB2.intron1 17:35110090..35116769
+##          >NM_004448.ERBB2.intron2 17:35116920..35118100
+##          >NM_004449.ERG.intron1 21:38955452..38878739
+##          >NM_004449.ERG.intron2 21:38878638..38869541
+##      """
+##  
+##  class SNPAnnotation(IntervalAnnotation):
+##      file_ext = 'gmapsnps'
+##      """
+##      Example:
+##          >rs62211261 21:14379270 CG
+##          >rs62211262 21:14379281 CG
+##      """