# HG changeset patch
# User jjohnson
# Date 1318956162 14400
# Node ID d58d272914e7e7da99c097334899ffd6b60b4ab6
Uploaded
diff -r 000000000000 -r d58d272914e7 gmap/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/README Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,50 @@
+
+GMAP and GSNAP use added datatypes:
+
+ add datatype definition file: lib/galaxy/datatypes/gmap.py
+
+ add the following import line to: lib/galaxy/datatypes/registry.py
+ import gmap # added for gmap tools
+
+ add to datatypes_conf.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (split_output == False)
+
+
+
+
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+GMAP_ (Genomic Mapping and Alignment Program) The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains. It is developed by Thomas D. Wu of Genentech, Inc.
+
+Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
+
+.. _GMAP: http://research-pub.gene.com/gmap/
+.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
+
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+You will want to read the README_
+
+.. _README: http://research-pub.gene.com/gmap/src/README
+
+
+
+
diff -r 000000000000 -r d58d272914e7 gmap/gmap_build.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/gmap_build.xml Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,163 @@
+
+ a GMAP DB Index
+
+ gmap_build
+
+ gmap --version
+ /bin/bash $shscript 2>1 1> $output
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#!/bin/bash
+#set $ds = chr(36)
+#set $gt = chr(62)
+#set $lt = chr(60)
+#set $ad = chr(38)
+#import os.path
+#set $gmapdb = $output.extra_files_path
+#set $mapsdir = $os.path.join($os.path.join($gmapdb,str($refname)), str($refname) + '.maps')
+mkdir -p $gmapdb
+## export GMAPDB required for cmetindex and atoiindex
+export GMAPDB=$gmapdb
+#for $k in $kmer.__str__.split(','):
+gmap_build -D $gmapdb -d $refname -s numeric-alpha -k $k $input
+#end for
+get-genome -D $gmapdb -d '?' | sed 's/^Available .*/gmap db: /'
+echo "kmers: " $kmer
+#if $splicesite.splice_source == 'refGeneTable':
+#if $splicesite.refGenes.__str__ != 'None':
+cat $splicesite.refGenes | psl_splicesites -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'splicesites')
+cat $splicesite.refGenes | psl_introns -s $splicesite.col_skip | iit_store -o $os.path.join($mapsdir,'introns')
+#end if
+#elif $splicesite.splice_source == 'gtf':
+#if $splicesite.gtfGenes.__str__ != 'None':
+cat $splicesite.gtfGenes | gtf_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
+cat $splicesite.gtfGenes | gtf_introns | iit_store -o $os.path.join($mapsdir,'introns')
+#end if
+#elif $splicesite.splice_source == 'gff3':
+#if $splicesite.gff3Genes.__str__ != 'None':
+cat $splicesite.gff3Genes | gff3_splicesites | iit_store -o $os.path.join($mapsdir,'splicesites')
+cat $splicesite.gff3Genes | gff3_introns | iit_store -o $os.path.join($mapsdir,'introns')
+#end if
+#end if
+#if $dbsnp.snp_source == 'snpTable':
+#if $dbsnp.snps.__str__ != 'None':
+#if $dbsnp.snpsex.__str__ != 'None':
+cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight -e $dbsnp.snpsex | iit_store -o $os.path.join($mapsdir,'snps')
+#else:
+cat $dbsnp.snps | dbsnp_iit -w $dbsnp.weight | iit_store -o $os.path.join($mapsdir,'snps')
+#end if
+snpindex -d $refname -v snps
+#end if
+#end if
+#if $cmetindex.__str__ == 'yes':
+cmetindex -d $refname
+echo "cmetindex"
+#end if
+#if $atoiindex.__str__ == 'yes':
+atoiindex -d $refname
+echo "atoiindex"
+#end if
+get-genome -D $gmapdb -d $refname -m '?' | sed 's/^Available maps .*/maps: /'
+
+
+
+
+
+
+
+
+
+**GMAP Build**
+
+GMAP Build creates an index of a genomic sequence for alignments using GMAP_ (Genomic Mapping and Alignment Program for mRNA and EST sequences) and GSNAP_ (Genomic Short-read Nucleotide Alignment Program).
+
+You will want to read the README_
+
+Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
+
+.. _GMAP: http://research-pub.gene.com/gmap/
+.. _GSNAP: http://research-pub.gene.com/gmap/
+.. _README: http://research-pub.gene.com/gmap/src/README
+.. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
+
+
+
+
+
diff -r 000000000000 -r d58d272914e7 gmap/gsnap.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/gsnap.xml Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,585 @@
+
+ Genomic Short-read Nucleotide Alignment Program
+
+ gsnap
+
+ gsnap --version
+
+ #import os.path, re
+ gsnap
+ --nthreads="4" --ordered
+ #if $refGenomeSource.genomeSource == "history":
+ --gseg=$refGenomeSource.ownFile
+ #elif $refGenomeSource.genomeSource == "gmapdb":
+ #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
+ --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb
+ #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
+ --kmer=$refGenomeSource.kmer
+ #end if
+ #if $refGenomeSource.splicemap != None and len($refGenomeSource.splicemap.__str__) == 2:
+ --use-splices=$refGenomeSource.splicemap
+ #end if
+ #if $refGenomeSource.snpindex != None and len($refGenomeSource.snpindex.__str__) == 2:
+ --use-snps=$refGenomeSource.snpindex
+ #end if
+ #else:
+ --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
+ #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
+ --kmer=$refGenomeSource.kmer
+ #end if
+ #end if
+ #if $mode.__str__ != '':
+ --mode=$mode
+ #end if
+ #if $computation.options == "advanced":
+ #if $computation.max_mismatches.__str__ != '':
+ --max-mismatches=$computation.max_mismatches
+ #end if
+ $computation.query_unk_mismatch
+ $computation.genome_unk_mismatch
+ #if $computation.terminal_threshold.__str__ != '':
+ --terminal-threshold=$computation.terminal_threshold
+ #end if
+ #if $computation.indel_penalty.__str__ != '':
+ --indel-penalty=$computation.indel_penalty
+ #end if
+ #if $computation.indel_endlength.__str__ != '':
+ --indel-endlength=$computation.indel_endlength
+ #end if
+ #if $computation.max_middle_insertions.__str__ != '':
+ --max-middle-insertions=$computation.max_middle_insertions
+ #end if
+ #if $computation.max_middle_deletions.__str__ != '':
+ --max-middle-deletions=$computation.max_middle_deletions
+ #end if
+ #if $computation.max_end_insertions.__str__ != '':
+ --max-end-insertions=$computation.max_end_insertions
+ #end if
+ #if $computation.max_end_deletions.__str__ != '':
+ --max-end-deletions=$computation.max_end_deletions
+ #end if
+ #if $computation.suboptimal_levels.__str__ != '':
+ --suboptimal-levels=$computation.suboptimal_levels
+ #end if
+ #if $computation.adapter_strip.__str__ != '':
+ --adapter-strip=$computation.adapter_strip
+ #end if
+ ## gmap options
+ #if $computation.gmap_mode.__str__ != '' and $computation.gmap_mode.__str__ != 'None':
+ --gmap-mode='$computation.gmap_mode'
+ #end if
+ #if $computation.trigger_score_for_gmap.__str__ != '':
+ --trigger-score-for-gmap=$computation.trigger_score_for_gmap
+ #end if
+ #if $computation.max_gmap_pairsearch.__str__ != '' and $re.search("pairsearch",$computation.gmap_mode):
+ --max-gmap-pairsearch=$computation.max_gmap_pairsearch
+ #end if
+ #if $computation.max_gmap_terminal.__str__ != '' and $re.search("terminal",$computation.gmap_mode):
+ --max-gmap-terminal=$computation.max_gmap_terminal
+ #end if
+ #if $computation.max_gmap_improvement.__str__ != '' and $re.search("improv",$computation.gmap_mode):
+ --max-gmap-improvement=$computation.max_gmap_improvement
+ #end if
+ #if $computation.microexon_spliceprob.__str__ != '':
+ --microexon-spliceprob=$computation.microexon_spliceprob
+ #end if
+ #end if
+ #if $splicing.options == "advanced":
+ $splicing.novelsplicing
+ #if $splicing.localsplicedist.__str__ != '':
+ --localsplicedist=$splicing.localsplicedist
+ #end if
+ #if $splicing.local_splice_penalty.__str__ != '':
+ --local-splice-penalty=$splicing.local_splice_penalty
+ #end if
+ #if $splicing.distant_splice_penalty.__str__ != '':
+ --distant-splice-penalty=$splicing.distant_splice_penalty
+ #end if
+ #if $splicing.local_splice_endlength.__str__ != '':
+ --local-splice-endlength=$splicing.local_splice_endlength
+ #end if
+ #if $splicing.distant_splice_endlength.__str__ != '':
+ --distant-splice-endlength=$splicing.distant_splice_endlength
+ #end if
+ #if $splicing.distant_splice_identity.__str__ != '':
+ --distant-splice-identity=$splicing.distant_splice_identity
+ #end if
+ #end if
+ #if $output.options == "advanced":
+ #if $output.npath.__str__ != '':
+ --npath=$output.npath
+ #end if
+ $output.quiet_if_excessive
+ $output.show_refdiff
+ $output.clip_overlap
+ #end if
+ #if $result.format == "sam":
+ --format=sam
+ $result.no_sam_headers
+ #if $result.read_group_id.__str__.strip != '':
+ --read-group-id='$result.read_group_id'
+ #end if
+ #if $result.read_group_name.__str__ != '':
+ --read-group-name='$result.read_group_name'
+ #end if
+ #if $result.read_group_library.__str__ != '':
+ --read-group-library='$result.read_group_library'
+ #end if
+ #if $result.read_group_platform.__str__ != '':
+ --read-group-platform='$result.read_group_platform'
+ #end if
+ #if $result.quality_shift.__str__ != '':
+ --quality-shift=$result.quality_shift
+ #end if
+ #elif $result.format == "goby":
+ #if $result.goby_output.__str__ != '':
+ --goby-output='$result.goby_output'
+ #end if
+ #if $result.creads_window_start.__str__ != '':
+ --creads-window-start=$result.creads_window_start
+ #end if
+ #if $result.creads_window_end.__str__ != '':
+ --creads-window-end=$result.creads_window_end
+ #end if
+ $result.creads_complement
+ #end if
+ ## TODO - do we need these options (Is it tally XOR runlength?):
+ ## --tallydir= --use-tally=tally
+ ## --runlengthdir --use-runlength=runlength
+ #if $seq.format == "gsnap_fasta":
+ $seq.circularinput $seq.gsnap
+ #else if $seq.format == "fastq":
+ #if $seq.barcode_length.__str__ != '':
+ --barcode-length=$seq.barcode_length
+ #end if
+ #if $seq.fastq_id_start.__str__ != '':
+ --fastq-id-start=$seq.fastq_id_start
+ #end if
+ #if $seq.fastq_id_end.__str__ != '':
+ --fastq-id-end=$seq.fastq_id_end
+ #end if
+ #if $seq.filter_chastity.__str__ != 'off':
+ --filter-chastity=$seq.filter_chastity
+ #end if
+ #if $seq.paired.ispaired.__str__ == "yes":
+ #if $seq.paired.pairmax_dna.__str__ != '':
+ --pairmax-dna=$seq.paired.pairmax_dna
+ #end if
+ #if $seq.paired.pairmax_rna.__str__ != '':
+ --pairmax-rna=$seq.paired.pairmax_rna
+ #end if
+ $seq.fastq $seq.paired.fastq
+ #else
+ $seq.fastq
+ #end if
+ #end if
+ #if $split_output == True
+ 2> $gsnap_stderr
+ #else
+ 2> $gsnap_stderr > $results
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (split_output == False)
+
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+ (split_output == True)
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.
+Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10.
+
+.. _GSNAP: http://research-pub.gene.com/gmap/
+.. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873
+http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed
+
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+You will want to read the README_
+
+.. _README: http://research-pub.gene.com/gmap/src/README
+
+------
+
+**Input formats**
+
+Input to GSNAP should be either in FASTQ or FASTA format.
+
+The FASTQ input may include quality scores, which will then be included in SAM
+output, if that output format is selected.
+
+For FASTA format, you should include one line per read (or end of a
+paired-end read). The same FASTA file can have a mixture of
+single-end and paired-end reads of varying lengths, if desired.
+
+Single-end reads:
+
+Each FASTA entry should contain one short read per line, like this
+
+>Header information
+AAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTA
+
+Each short read can have a different length. However, the entire read
+needs to be on a single line, and may not wrap around multiple lines.
+If it extends to a second line, GSNAP will think that the read is
+paired-end.
+
+
+Paired-end reads:
+
+Each FASTA entry should contain two short reads, one per line, like
+this
+
+>Header information
+AAAACATTCTCCTCCGCATAAGCCTAGTAGATTA
+GGCGTAGGTAGAAGTAGAGGTTAAGGCGCGTCAG
+
+By default, the program assumes that the second end is in the reverse
+complement direction compared with the first end. If they are in the
+same direction, you may need to use the --circular-input (or -c) flag.
+
+( The Galaxy tool: "FASTA Width formatter" can be used to reformat fasta files to have single line sequences. )
+
+------
+
+**Output formats in GSNAP**
+
+SAM output format
+
+Default GSNAP format
+ See the README_
+
+
+
+
+
+
+
diff -r 000000000000 -r d58d272914e7 gmap/lib/galaxy/datatypes/gmap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gmap/lib/galaxy/datatypes/gmap.py Tue Oct 18 12:42:42 2011 -0400
@@ -0,0 +1,169 @@
+"""
+GMAP indexes
+"""
+import logging
+import os,os.path,re
+from data import Text
+from metadata import MetadataElement
+
+log = logging.getLogger(__name__)
+
+class GmapDB( Text ):
+ """
+ A GMAP DB for indexes
+ """
+ MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True )
+ MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True )
+ MetadataElement( name="kmers", default=[''], desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True )
+ MetadataElement( name="map_dir", desc="The maps directory", default='unknown', set_in_upload=True, readonly=True )
+ MetadataElement( name="maps", default=[''], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True )
+ MetadataElement( name="snps", default=[''], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True )
+ MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True )
+ MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True )
+
+ file_ext = 'gmapdb'
+ is_binary = True
+ composite_type = 'auto_primary_file'
+ allow_datatype_change = False
+
+ def generate_primary_file( self, dataset = None ):
+ """
+ This is called only at upload to write the html file
+ cannot rename the datasets here - they come with the default unfortunately
+ """
+ return '
AutoGenerated Primary File for Composite Dataset'
+
+ def regenerate_primary_file(self,dataset):
+ """
+ cannot do this until we are setting metadata
+ """
+ bn = dataset.metadata.db_name
+ log.info( "GmapDB regenerate_primary_file %s" % (bn))
+ rval = ['GMAPDB %sGMAPDB %s
cmet %s
atoi %sMaps:
' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)]
+ for i,name in enumerate(dataset.metadata.maps):
+ rval.append( '- %s' % name)
+ rval.append( '
' )
+ f = file(dataset.file_name,'w')
+ f.write("\n".join( rval ))
+ f.write('\n')
+ f.close()
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ log.info( "GmapDB set_peek %s" % (dataset))
+ if not dataset.dataset.purged:
+ dataset.peek = "GMAPDB index %s\n cmet %s\n atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps )
+ dataset.blurb = "GMAPDB %s" % ( dataset.metadata.db_name )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+ def display_peek( self, dataset ):
+ try:
+ return dataset.peek
+ except:
+ return "GMAP index file"
+ def sniff( self, filename ):
+ return False
+ def set_meta( self, dataset, overwrite = True, **kwd ):
+ """
+ Expecting:
+ extra_files_path//db_name>.ref3
+ extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp
+ extra_files_path/db_name/db_name.ref1[2345]1[2345]3positions
+ extra_files_path/db_name/db_name.ref1[2345]1[2345]3gammaptrs
+ index maps:
+ extra_files_path/db_name/db_name.maps/*.iit
+ """
+ log.info( "GmapDB set_meta %s %s" % (dataset,dataset.extra_files_path))
+ pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?'
+ efp = dataset.extra_files_path
+ flist = os.listdir(efp)
+ for i,fname in enumerate(flist):
+ log.info( "GmapDB set_meta %s %s" % (i,fname))
+ fpath = os.path.join(efp,fname)
+ if os.path.isdir(fpath):
+ ilist = os.listdir(fpath)
+ kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata
+ for j,iname in enumerate(ilist):
+ log.info( "GmapDB set_meta file %s %s" % (j,iname))
+ ipath = os.path.join(fpath,iname)
+ if os.path.isdir(ipath): # find maps
+ dataset.metadata.map_dir = iname
+ for mapfile in os.listdir(ipath):
+ mapname = mapfile.replace('.iit','')
+ log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile))
+ dataset.metadata.maps.append(mapname)
+ else:
+ m = re.match(pat,iname)
+ if m:
+ log.info( "GmapDB set_meta m %s %s " % (iname, m))
+ assert len(m.groups()) == 10
+ dataset.metadata.db_name = fname
+ if m.groups()[2] == 'ref':
+ if m.groups()[-1] != None:
+ dataset.metadata.snps.append(m.groups()[-1])
+ else:
+ if m.groups()[-3] != None:
+ k = int(m.groups()[-3])
+ kmers[k] = k
+ if m.groups()[-4] != None:
+ dataset.metadata.basesize = int( m.groups()[-4])
+ elif m.groups()[3] == 'met':
+ dataset.metadata.cmet = True
+ elif m.groups()[4] == 'a2i':
+ dataset.metadata.atoi = True
+ dataset.metadata.kmers = kmers.keys()
+
+## class IntervalIndexTree( Text ):
+## """
+## A GMAP Interval Index Tree Map
+## created by iit_store
+## (/path/to/map)/(mapname).iit
+## """
+## MetadataElement( name="map_name", desc="The map name for this index set", default='unknown', set_in_upload=True, readonly=False )
+## file_ext = 'iit'
+## is_binary = True
+## composite_type = 'auto_primary_file'
+## allow_datatype_change = False
+##
+## class IntervalAnnotation(data.Text):
+## """
+## Class describing a GMAP Interval format:
+## >label coords optional_tag
+## optional_annotation (which may be zero, one, or multiple lines)
+## The coords should be of the form:
+## chr:position
+## chr:startposition..endposition
+## """
+## file_ext = 'gmapannotation'
+##
+## class SpliceSiteAnnotation(IntervalAnnotation):
+## file_ext = 'gmapsplicesites'
+## """
+## Example:
+## >NM_004448.ERBB2.exon1 17:35110090..35110091 donor 6678
+## >NM_004448.ERBB2.exon2 17:35116768..35116769 acceptor 6678
+## >NM_004448.ERBB2.exon2 17:35116920..35116921 donor 1179
+## >NM_004448.ERBB2.exon3 17:35118099..35118100 acceptor 1179
+## >NM_004449.ERG.exon1 21:38955452..38955451 donor 783
+## >NM_004449.ERG.exon2 21:38878740..38878739 acceptor 783
+## >NM_004449.ERG.exon2 21:38878638..38878637 donor 360
+## >NM_004449.ERG.exon3 21:38869542..38869541 acceptor 360
+## """
+##
+## class IntronAnnotation(IntervalAnnotation):
+## file_ext = 'gmapintrons'
+## """
+## Example:
+## >NM_004448.ERBB2.intron1 17:35110090..35116769
+## >NM_004448.ERBB2.intron2 17:35116920..35118100
+## >NM_004449.ERG.intron1 21:38955452..38878739
+## >NM_004449.ERG.intron2 21:38878638..38869541
+## """
+##
+## class SNPAnnotation(IntervalAnnotation):
+## file_ext = 'gmapsnps'
+## """
+## Example:
+## >rs62211261 21:14379270 CG
+## >rs62211262 21:14379281 CG
+## """