comparison Population.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 2c498d40ecde
children
comparison
equal deleted inserted replaced
26:91e835060ad2 27:8997f2ca8c7a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 from OrderedDict import OrderedDict 3 import OrderedDict
4 import base64
5 import json
6 import zlib
7
8 import sys
4 9
5 class Individual(object): 10 class Individual(object):
6 __slots__ = ['_column', '_name', '_alias'] 11 __slots__ = ['_column', '_name', '_alias']
7 12
8 def __init__(self, column, name, alias=None): 13 def __init__(self, column, name, alias=None):
9 self._column = column 14 self._column = int(column)
10 self._name = name 15 self._name = name
11 self._alias = alias 16 self._alias = alias
12 17
13 @property 18 @property
14 def column(self): 19 def column(self):
40 return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias) 45 return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias)
41 46
42 47
43 class Population(object): 48 class Population(object):
44 def __init__(self, name=None): 49 def __init__(self, name=None):
45 self._columns = OrderedDict() 50 self._columns = OrderedDict.OrderedDict()
46 self._name = name 51 self._name = name
47 52
48 @property 53 @property
49 def name(self): 54 def name(self):
50 return self._name 55 return self._name
85 return self._columns[column] 90 return self._columns[column]
86 return None 91 return None
87 92
88 def tag_list(self, delimiter=':'): 93 def tag_list(self, delimiter=':'):
89 entries = [] 94 entries = []
90 for column, individual in self._columns.items(): 95 for column, individual in self._columns.iteritems():
91 entry = '{0}{1}{2}'.format(column, delimiter, individual.name) 96 first_token = individual.name.split()[0]
97 entry = '{0}{1}{2}'.format(column, delimiter, first_token)
92 entries.append(entry) 98 entries.append(entry)
93 return entries 99 return entries
94 100
95 def to_string(self, delimiter=':', separator=' ', replace_names_with=None): 101 def to_string(self, delimiter=':', separator=' ', replace_names_with=None):
96 entries = [] 102 entries = []
120 for tag in tag_list: 126 for tag in tag_list:
121 column, name = tag.split(':') 127 column, name = tag.split(':')
122 individual = Individual(column, name) 128 individual = Individual(column, name)
123 self.add_individual(individual) 129 self.add_individual(individual)
124 130
131 def from_wrapped_dict(self, wrapped_dict):
132 unwraped_dict = self.unwrap_dict(wrapped_dict)
133 for name, column in unwraped_dict.iteritems():
134 individual = Individual(column, name)
135 self.add_individual(individual)
136
137 def unwrap_dict(self, wrapped_dict):
138 decoded_value = self.decode_value(wrapped_dict)
139 decompressed_value = self.decompress_value(decoded_value)
140 def _decode_list(data):
141 rv = []
142 for item in data:
143 if isinstance(item, unicode):
144 item = item.encode('utf-8')
145 elif isinstance(item, list):
146 item = _decode_list(item)
147 elif isinstance(item, dict):
148 item = _decode_dict(item)
149 rv.append(item)
150 return rv
151 def _decode_dict(data):
152 rv = {}
153 for key, value in data.iteritems():
154 if isinstance(key, unicode):
155 key = key.encode('utf-8')
156 if isinstance(value, unicode):
157 value = value.encode('utf-8')
158 elif isinstance(value, list):
159 value = _decode_list(value)
160 elif isinstance(value, dict):
161 value = _decode_dict(value)
162 rv[key] = value
163 return rv
164 unwrapped_dict = json.loads(decompressed_value, object_hook=_decode_dict)
165 return unwrapped_dict
166
167 def decode_value(self, value):
168 try:
169 return base64.b64decode(value)
170 except TypeError, message:
171 print >> sys.stderr, 'base64.b64decode: {0}: {1}'.format(message, value)
172 sys.exit(1)
173
174 def decompress_value(self, value):
175 try:
176 return zlib.decompress(value)
177 except zlib.error, message:
178 print >> sys.stderr, 'zlib.decompress: {0}'.format(message)
179 sys.exit(1)
180
125 def individual_names(self): 181 def individual_names(self):
126 for column, individual in self._columns.items(): 182 for column, individual in self._columns.items():
127 yield individual.name 183 first_token = individual.name.split()[0]
184 yield first_token
128 185