annotate lib/galaxy/datatypes/snpeff.py @ 0:e8adfc4c0a6b draft

Uploaded
author iuc
date Wed, 11 Dec 2013 08:53:32 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
1 """
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
2 SnpEff datatypes
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
3 """
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
4 import os,os.path,re,sys
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
5 import galaxy.datatypes.data
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
6 from galaxy.datatypes.data import Text
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
7 from galaxy.datatypes.metadata import MetadataElement
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
8
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
9 class SnpEffDb( Text ):
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
10 """Class describing an IGV tiled data file (TDF) .tdf binary file"""
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
11 file_ext = "snpeffdb"
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
12 MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
13 MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[] )
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
14 MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[] )
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
15
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
16 def __init__( self, **kwd ):
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
17 Text.__init__( self, **kwd )
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
18
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
19 def set_meta( self, dataset, **kwd ):
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
20 Text.set_meta(self, dataset, **kwd )
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
21 data_dir = dataset.extra_files_path
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
22 ## search data_dir/genome_version for files
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
23 regulation_pattern = 'regulation_(.+).bin'
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
24 # annotation files that are included in snpEff by a flag
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
25 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
26 regulations = []
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
27 annotations = []
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
28 if data_dir and os.path.isdir(data_dir):
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
29 for root, dirs, files in os.walk(data_dir):
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
30 for fname in files:
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
31 if fname.startswith('snpEffectPredictor'):
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
32 # if snpEffectPredictor.bin download succeeded
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
33 genome_version = os.path.basename(root)
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
34 dataset.metadata.genome_version = genome_version
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
35 else:
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
36 m = re.match(regulation_pattern,fname)
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
37 if m:
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
38 name = m.groups()[0]
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
39 regulations.append(name)
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
40 elif fname in annotations_dict:
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
41 value = annotations_dict[fname]
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
42 name = value.lstrip('-')
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
43 annotations.append(name)
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
44 dataset.metadata.regulation = regulations
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
45 dataset.metadata.annotation = annotations
e8adfc4c0a6b Uploaded
iuc
parents:
diff changeset
46