Mercurial > repos > miller-lab > genome_diversity
comparison Population.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 2c498d40ecde |
children |
comparison
equal
deleted
inserted
replaced
26:91e835060ad2 | 27:8997f2ca8c7a |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 from OrderedDict import OrderedDict | 3 import OrderedDict |
4 import base64 | |
5 import json | |
6 import zlib | |
7 | |
8 import sys | |
4 | 9 |
5 class Individual(object): | 10 class Individual(object): |
6 __slots__ = ['_column', '_name', '_alias'] | 11 __slots__ = ['_column', '_name', '_alias'] |
7 | 12 |
8 def __init__(self, column, name, alias=None): | 13 def __init__(self, column, name, alias=None): |
9 self._column = column | 14 self._column = int(column) |
10 self._name = name | 15 self._name = name |
11 self._alias = alias | 16 self._alias = alias |
12 | 17 |
13 @property | 18 @property |
14 def column(self): | 19 def column(self): |
40 return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias) | 45 return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias) |
41 | 46 |
42 | 47 |
43 class Population(object): | 48 class Population(object): |
44 def __init__(self, name=None): | 49 def __init__(self, name=None): |
45 self._columns = OrderedDict() | 50 self._columns = OrderedDict.OrderedDict() |
46 self._name = name | 51 self._name = name |
47 | 52 |
48 @property | 53 @property |
49 def name(self): | 54 def name(self): |
50 return self._name | 55 return self._name |
85 return self._columns[column] | 90 return self._columns[column] |
86 return None | 91 return None |
87 | 92 |
88 def tag_list(self, delimiter=':'): | 93 def tag_list(self, delimiter=':'): |
89 entries = [] | 94 entries = [] |
90 for column, individual in self._columns.items(): | 95 for column, individual in self._columns.iteritems(): |
91 entry = '{0}{1}{2}'.format(column, delimiter, individual.name) | 96 first_token = individual.name.split()[0] |
97 entry = '{0}{1}{2}'.format(column, delimiter, first_token) | |
92 entries.append(entry) | 98 entries.append(entry) |
93 return entries | 99 return entries |
94 | 100 |
95 def to_string(self, delimiter=':', separator=' ', replace_names_with=None): | 101 def to_string(self, delimiter=':', separator=' ', replace_names_with=None): |
96 entries = [] | 102 entries = [] |
120 for tag in tag_list: | 126 for tag in tag_list: |
121 column, name = tag.split(':') | 127 column, name = tag.split(':') |
122 individual = Individual(column, name) | 128 individual = Individual(column, name) |
123 self.add_individual(individual) | 129 self.add_individual(individual) |
124 | 130 |
131 def from_wrapped_dict(self, wrapped_dict): | |
132 unwraped_dict = self.unwrap_dict(wrapped_dict) | |
133 for name, column in unwraped_dict.iteritems(): | |
134 individual = Individual(column, name) | |
135 self.add_individual(individual) | |
136 | |
137 def unwrap_dict(self, wrapped_dict): | |
138 decoded_value = self.decode_value(wrapped_dict) | |
139 decompressed_value = self.decompress_value(decoded_value) | |
140 def _decode_list(data): | |
141 rv = [] | |
142 for item in data: | |
143 if isinstance(item, unicode): | |
144 item = item.encode('utf-8') | |
145 elif isinstance(item, list): | |
146 item = _decode_list(item) | |
147 elif isinstance(item, dict): | |
148 item = _decode_dict(item) | |
149 rv.append(item) | |
150 return rv | |
151 def _decode_dict(data): | |
152 rv = {} | |
153 for key, value in data.iteritems(): | |
154 if isinstance(key, unicode): | |
155 key = key.encode('utf-8') | |
156 if isinstance(value, unicode): | |
157 value = value.encode('utf-8') | |
158 elif isinstance(value, list): | |
159 value = _decode_list(value) | |
160 elif isinstance(value, dict): | |
161 value = _decode_dict(value) | |
162 rv[key] = value | |
163 return rv | |
164 unwrapped_dict = json.loads(decompressed_value, object_hook=_decode_dict) | |
165 return unwrapped_dict | |
166 | |
167 def decode_value(self, value): | |
168 try: | |
169 return base64.b64decode(value) | |
170 except TypeError, message: | |
171 print >> sys.stderr, 'base64.b64decode: {0}: {1}'.format(message, value) | |
172 sys.exit(1) | |
173 | |
174 def decompress_value(self, value): | |
175 try: | |
176 return zlib.decompress(value) | |
177 except zlib.error, message: | |
178 print >> sys.stderr, 'zlib.decompress: {0}'.format(message) | |
179 sys.exit(1) | |
180 | |
125 def individual_names(self): | 181 def individual_names(self): |
126 for column, individual in self._columns.items(): | 182 for column, individual in self._columns.items(): |
127 yield individual.name | 183 first_token = individual.name.split()[0] |
184 yield first_token | |
128 | 185 |