Mercurial > repos > dcouvin > resfinder4
comparison resfinder/cge/output/table.py @ 0:55051a9bc58d draft default tip
Uploaded
author | dcouvin |
---|---|
date | Mon, 10 Jan 2022 20:06:07 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:55051a9bc58d |
---|---|
1 #!/usr/bin/env python3 | |
2 from .orderedset import OrderedSet | |
3 from .exceptions import DuplicateKeyError | |
4 from .exceptions import LockedObjectError | |
5 | |
6 | |
7 class TableResults(dict): | |
8 """ | |
9 The hit table is created empty. | |
10 """ | |
11 def __init__(self, software, version, run_date, run_cmd, id): | |
12 self.software = software | |
13 self.version = version | |
14 self.run_date = run_date | |
15 self.run_cmd = run_cmd | |
16 self.id = id | |
17 | |
18 self.long = {} | |
19 self.medium = {} | |
20 self.short = {} | |
21 self.databases = {} | |
22 | |
23 def add_database(self, id, version): | |
24 self.databases[id] = version | |
25 | |
26 def add_table(self, table_name, copy=False): | |
27 """ | |
28 TODO: Implement copy argument | |
29 """ | |
30 if(isinstance(table_name, Table)): | |
31 name = table_name.name | |
32 else: | |
33 name = table_name | |
34 | |
35 if(self.long.get(name, False)): | |
36 raise DuplicateKeyError("All table names need to be unique. An " | |
37 "attempt was made to add a table with a " | |
38 "name that already exists. The name is:{}." | |
39 .format(name)) | |
40 | |
41 if(isinstance(table_name, Table)): | |
42 if(copy): | |
43 raise NotImplementedError() | |
44 # self.long[name] = copy.deepcopy(table_name) | |
45 else: | |
46 self.long[name] = table_name | |
47 else: | |
48 self.long[name] = Table(name) | |
49 | |
50 def as_txt(self, type, header=True, sep="\t", sort=True, pre_name=""): | |
51 if(type == "long"): | |
52 tr = self.long | |
53 elif(type == "medium"): | |
54 tr = self.medium | |
55 elif(type == "short"): | |
56 tr = self.short | |
57 else: | |
58 raise ValueError("as_text method in TableResults class must be " | |
59 "called with either 'long', 'medium', or " | |
60 "'short'. It was called with {}".format(type)) | |
61 | |
62 out_str = "" | |
63 | |
64 for table_name in tr: | |
65 table_str = tr[table_name].as_txt(header=header, sep=sep, sort=sort) | |
66 out_str += ("{pre}{name}\n{table}\n" | |
67 .format(pre=pre_name, name=table_name, table=table_str)) | |
68 | |
69 return out_str | |
70 | |
71 | |
72 def merge(self, tr2, auto_row_header=False, copy=False): | |
73 """ | |
74 Merges another table result object into the calling table results | |
75 instance. | |
76 | |
77 Table objects with identical names will be merged, as will headers | |
78 within merged tables. | |
79 | |
80 The other table is copied with copy.deepcopy so that changes to the | |
81 merged tables or changes to the other table does not affect each | |
82 other. | |
83 | |
84 IMPORTANT: Currently ONLY merges the tables stored in 'self.long' | |
85 IMPORTANT: Table merge function does not copy the table that gets | |
86 merged. Thus, a change in the table that gets merged | |
87 will also occur in the merged table. | |
88 TODO: Implement the copy argument to create a deepcopy of the | |
89 merged table instead of just referencing it. | |
90 """ | |
91 if(copy is True): | |
92 raise NotImplementedError() | |
93 | |
94 for table_name in tr2.long: | |
95 other_tbl = tr2.long[table_name] | |
96 | |
97 if(table_name not in self.long): | |
98 self.add_table(table_name) | |
99 self.long[table_name] = other_tbl | |
100 continue | |
101 | |
102 call_tbl = self.long[table_name] | |
103 | |
104 for row_header in other_tbl: | |
105 # Get row now as row header might change if a row header | |
106 # collision occurs. | |
107 other_row_dict = other_tbl[row_header] | |
108 | |
109 # Handle row header collisions | |
110 if(row_header in call_tbl): | |
111 if(auto_row_header is False): | |
112 raise DuplicateKeyError( | |
113 "During merge of two table, row headers were " | |
114 "found not to be unique across the two tables. " | |
115 "Row headers needs to be renamed or the merge " | |
116 "method should be called with " | |
117 "auto_row_header=True. Note auto row headers will " | |
118 "become incrementing integers.\n" | |
119 "Calling table: {}\n" | |
120 "Other table: {}\n" | |
121 "Row header: {}\n" | |
122 .format(call_tbl.name, other_tbl.name, row_header)) | |
123 call_tbl._row_header_count += 1 | |
124 row_header = call_tbl._row_header_count | |
125 | |
126 call_tbl[row_header] = other_row_dict | |
127 | |
128 | |
129 class Row(dict): | |
130 """ | |
131 """ | |
132 def __init__(self, table): | |
133 self.table = table | |
134 super(Row, self).__init__() | |
135 | |
136 def __setitem__(self, col_header, col_val): | |
137 if(not col_header in self.table._headers): | |
138 if(self.table.lock_headers): | |
139 raise LockedObjectError("Attempt to add a header via a Row " | |
140 "insertion to a table, where the " | |
141 "headers has been locked.\n" | |
142 "Table: {}\n" | |
143 "Row header, val: {}, {}\n" | |
144 "Header: {}\n" | |
145 .format(self.name, col_header, col_val, | |
146 header)) | |
147 self.table.add_header(col_header) | |
148 | |
149 super(Row, self).__setitem__(col_header, col_val) | |
150 | |
151 | |
152 class Table(dict): | |
153 """ | |
154 Keys of a Table object is considered 'row headers'. | |
155 The Key can be None in which case the row header will become an | |
156 increasing integer (starting at 1). | |
157 | |
158 Values must be lists of length 2. Where the first entry is the name of | |
159 a column header and the second entry is the value. | |
160 """ | |
161 NULL_VAL = "NA" | |
162 | |
163 def __init__(self, name): | |
164 self.name = name | |
165 self.lock_headers = False | |
166 | |
167 self._headers = OrderedSet() | |
168 self._row_header_count = 0 | |
169 self._sort_key = "row_header" | |
170 self._sort_list = SortList(container=self) | |
171 self._iter_count = 0 | |
172 | |
173 def add_header(self, header, exist_ok=True): | |
174 if(self.lock_headers): | |
175 raise LockedObjectError("Attempt to add a header to an object " | |
176 "where the headers has been locked.\n" | |
177 "Table: {}\n" | |
178 "Header: {}\n".format(self.name, header)) | |
179 | |
180 if(exist_ok is False and header in self._headers): | |
181 raise DuplicateKeyError("Attempt to add header that already exists" | |
182 " to table, but the add_header method was " | |
183 "called with exist_ok=False.\n" | |
184 "Table: {}\n" | |
185 "Header: {}\n".format(self.name, header)) | |
186 self._headers.add(header) | |
187 | |
188 def add_headers(self, headers, exist_ok=True): | |
189 for header in headers: | |
190 self.add_header(header, exist_ok) | |
191 | |
192 def as_txt(self, header=True, sep="\t", sort=True): | |
193 """ | |
194 """ | |
195 if(sort): | |
196 self.set_sort_key(self._sort_key) | |
197 | |
198 if(header): | |
199 out_str = ("{sep}{heads}\n" | |
200 .format(sep=sep, heads=sep.join(self._headers))) | |
201 else: | |
202 out_str = "" | |
203 | |
204 for row in self: | |
205 row_list = self.get_row_as_list(row, as_txt=True) | |
206 out_str += ("{}\n".format(sep.join(row_list))) | |
207 | |
208 return out_str | |
209 | |
210 | |
211 def extract_column(self, col): | |
212 """ | |
213 """ | |
214 if(col == "row_header"): | |
215 return list(self.keys()) | |
216 if(col not in self._headers): | |
217 raise KeyError("Attempt to extract a column that doesn't exist.\n" | |
218 "Missing column name: {}\n" | |
219 "Table name: {}".format(col, self.name)) | |
220 column = [] | |
221 for entry in self._sort_list: | |
222 row = self[entry._key] | |
223 column.append(row.get(col, Table.NULL_VAL)) | |
224 return column | |
225 | |
226 def get_headers(self): | |
227 """ Returns an OrderedSet """ | |
228 return self._headers | |
229 | |
230 def get_row_as_list(self, row_header, as_txt=False): | |
231 if(row_header not in self): | |
232 raise KeyError("Unable to find the row_header: {}\n" | |
233 "In Table object named: {}" | |
234 .format(row_header, self.name)) | |
235 | |
236 if(as_txt): | |
237 row_list = [str(row_header)] | |
238 else: | |
239 row_list = [row_header] | |
240 | |
241 for col_header in self._headers: | |
242 if(as_txt): | |
243 row_list.append(str( | |
244 self[row_header].get(col_header, Table.NULL_VAL))) | |
245 else: | |
246 row_list.append( | |
247 self[row_header].get(col_header, Table.NULL_VAL)) | |
248 | |
249 return row_list | |
250 | |
251 def get_sort_list(self): | |
252 """ | |
253 """ | |
254 if(self._sort_key is None): | |
255 raise TypeError("The sort key is None. The sort key needs to be " | |
256 "set before calling the get_sort_list method.\n" | |
257 "Table: {}".format(self.name)) | |
258 return self._sort_list | |
259 | |
260 def rename_row(self, old_name, new_name): | |
261 """ | |
262 """ | |
263 row_dict = self[old_name] | |
264 del(self[old_name]) | |
265 self._sort_list.remove(old_name) | |
266 self[new_name] = row_dict | |
267 | |
268 def set_sort_key(self, key, sort=True): | |
269 """ | |
270 """ | |
271 if(key not in self._headers and key != "row_header"): | |
272 raise KeyError("Attempt to set sort_key to a value that is not a " | |
273 "header.\n" | |
274 "Key: {}\n" | |
275 "Available headers: {}\n" | |
276 "Table name: {}" | |
277 .format(key, self._headers, self.name)) | |
278 | |
279 self._sort_key = key | |
280 | |
281 for i, entry in enumerate(self._sort_list): | |
282 if(self._sort_key == "row_header"): | |
283 entry._val = entry._key | |
284 else: | |
285 row = self[entry._key] | |
286 entry._val = row.get(self._sort_key, Table.NULL_VAL) | |
287 self._sort_list[i] = entry | |
288 | |
289 if(sort is True): | |
290 self._sort_list.sort() | |
291 | |
292 def set_sort_list(self, list): | |
293 """ | |
294 """ | |
295 if(_sort_key is None): | |
296 raise TypeError("The sort key is None. The sort key needs to be " | |
297 "set before calling the set_sort_list method.\n" | |
298 "Table: {}".format(self.name)) | |
299 self._sort_list = list | |
300 | |
301 def __iter__(self): | |
302 """ | |
303 """ | |
304 self._iter_count = 0 | |
305 return self | |
306 | |
307 def __next__(self): | |
308 """ | |
309 """ | |
310 if(self._iter_count >= len(self)): | |
311 raise StopIteration | |
312 entry = self._sort_list[self._iter_count] | |
313 self._iter_count += 1 | |
314 return entry._key | |
315 | |
316 def __setitem__(self, row_header=None, col_val=None): | |
317 """ | |
318 TODO | |
319 """ | |
320 if(col_val is None): | |
321 raise TypeError("Column key/value argument must be a dict, list or" | |
322 " tuple. Value was None.\n" | |
323 "Table: {}".format(self.name)) | |
324 | |
325 if(row_header is None): | |
326 row_header = self._row_header_count | |
327 self._row_header_count += 1 | |
328 | |
329 row_dict = self.get(row_header, Row(table=self)) | |
330 | |
331 # If row is not new. Sort key has been added to the sort list | |
332 # previously. | |
333 sort_key_added = True | |
334 if(not row_dict): | |
335 # If row is new a sort key needs to be added to the sort list. | |
336 sort_key_added = False | |
337 if(self._sort_key == "row_header"): | |
338 self._sort_list.append(row_header, row_header, ignore_len=True) | |
339 sort_key_added = True | |
340 | |
341 # Several values must be added as a dict. | |
342 if(isinstance(col_val, dict)): | |
343 for col_name, val in col_val.items(): | |
344 if(col_name not in self._headers): | |
345 self.add_header(col_name) | |
346 # Add sort key if needed | |
347 if(self._sort_key == col_name and sort_key_added is False): | |
348 self._sort_list.append(row_header, val, ignore_len=True) | |
349 sort_key_added = True | |
350 row_dict[col_name] = val | |
351 # Add sort key if needed | |
352 if(not sort_key_added): | |
353 self._sort_list.append(row_header, Table.NULL_VAL, | |
354 ignore_len=True) | |
355 return super(Table, self).__setitem__(row_header, row_dict) | |
356 | |
357 # A single cell value in a table can be added using a list or tuple. | |
358 if(isinstance(col_val, list) or isinstance(col_val, tuple)): | |
359 if(len(col_val) == 2): | |
360 if(col_val[0] not in self._headers): | |
361 self.add_header(col_val[0]) | |
362 row_dict[col_val[0]] = col_val[1] | |
363 return super(Table, self).__setitem__(row_header, row_dict) | |
364 else: | |
365 raise ValueError("Unexpected number of values. Expected 2. " | |
366 "Got: {}\n" | |
367 "Input: {}\n" | |
368 "Table: {}".format(len(col_val), col_val, | |
369 self.name)) | |
370 | |
371 raise TypeError("Unexpected type. The value must be of the type Row, " | |
372 "list, or tuple.\n" | |
373 "Recieved type: {}\n" | |
374 "Table: {}\n".format(type(col_name), self.name)) | |
375 | |
376 | |
377 class ColumnHeader(): | |
378 def __init__(self, table, header): | |
379 """ | |
380 """ | |
381 table._col_header_count += 1 | |
382 self._header = header | |
383 self._index = table._col_header_count | |
384 | |
385 def __cmp__(self, other): | |
386 if(not isinstance(other, ColumnHeader)): | |
387 raise TypeError("A ColumnHeader object cannot be compared to a " | |
388 "non-ColumnHeader object.\n" | |
389 "ColumnHeader _header: {} _index: {}\n" | |
390 "Other object was of type: {}" | |
391 .format(self._header, self._index, type(other))) | |
392 return super(ColumnHeader, self._index).__cmp__(other._index) | |
393 | |
394 | |
395 class SortList(list): | |
396 """ | |
397 Used together with the Table class, as a way to extract rows/data in a | |
398 specific order defined by objects of the type SortList. | |
399 | |
400 Programmers are meant to sort this list in the desired order and store | |
401 it in a Table class object which will then output rows/data in the | |
402 order defined by the list. | |
403 | |
404 Implementations details: | |
405 The SortList object contain SortListEntries containing key variables | |
406 with unique values which define the order. And value variables which | |
407 is the 'public' values used to sort on. | |
408 """ | |
409 def __init__(self, container=None, unique_list=None, val_list=None): | |
410 self.container = None | |
411 if(container is not None): | |
412 self.container = container | |
413 | |
414 if(unique_list is None and val_list is None): | |
415 return | |
416 if(len(unique_list) != len(val_list)): | |
417 raise IndexError("A SortList object was initiated with two lists " | |
418 "of different sizes.\n" | |
419 "len(unique_list) {} != {} len(val_list)" | |
420 .format(len(unique_list), len(val_list))) | |
421 | |
422 for i, val in enumerate(val_list): | |
423 self.append(unique_list[i], val) | |
424 | |
425 def append(self, key, val, ignore_len=False): | |
426 entry = SortListEntry(key, val) | |
427 if(self.container is not None and ignore_len is False): | |
428 if(len(self.container) != (len(self) + 1)): | |
429 raise IndexError("Attempt to add an entry to SortList which " | |
430 "would cause the list to be longer than the " | |
431 "affiliated SortList container.") | |
432 return super(SortList, self).append(entry) | |
433 | |
434 def remove(self, id): | |
435 """ | |
436 """ | |
437 for i, entry in enumerate(self): | |
438 if(id == entry._key): | |
439 del(self[i]) | |
440 | |
441 def __repr__(self): | |
442 outstr_list = [] | |
443 for entry in self: | |
444 outstr_list.append("{}\t{}".format(entry._key, entry._val)) | |
445 return "\n".join(outstr_list) | |
446 | |
447 | |
448 class SortListEntry(): | |
449 """ | |
450 """ | |
451 def __init__(self, key, val): | |
452 self._key = key | |
453 self._val = val | |
454 | |
455 def __cmp__(self, other): | |
456 if(not isinstance(other, SortListEntry)): | |
457 raise TypeError("A SortListEntry object cannot be compared to a " | |
458 "non-SortListEntry object.\n" | |
459 "SortListEntry key: {} val: {}\n" | |
460 "Other object was of type: {}" | |
461 .format(self._key, self._val, type(other))) | |
462 | |
463 sort_val = str(self._val).lower() | |
464 sort_val_other = str(other._val).lower() | |
465 return sort_val.__cmp__(sort_val_other) | |
466 | |
467 def __repr__(self): | |
468 return "{},{}".format(self._key, self._val) | |
469 | |
470 def __lt__(self, other): | |
471 sort_val = str(self._val).lower() | |
472 sort_val_other = str(other._val).lower() | |
473 return sort_val.__lt__(sort_val_other) | |
474 | |
475 def __le__(self, other): | |
476 sort_val = str(self._val).lower() | |
477 sort_val_other = str(other._val).lower() | |
478 return sort_val.__le__(sort_val_other) | |
479 | |
480 def __gt__(self, other): | |
481 sort_val = str(self._val).lower() | |
482 sort_val_other = str(other._val).lower() | |
483 return sort_val.__gt__(sort_val_other) | |
484 | |
485 def __ge__(self, other): | |
486 sort_val = str(self._val).lower() | |
487 sort_val_other = str(other._val).lower() | |
488 return sort_val.__ge__(sort_val_other) | |
489 | |
490 def __eq__(self, other): | |
491 sort_val = str(self._val).lower() | |
492 sort_val_other = str(other._val).lower() | |
493 return sort_val.__eq__(sort_val_other) | |
494 | |
495 def __ne__(self, other): | |
496 sort_val = str(self._val).lower() | |
497 sort_val_other = str(other._val).lower() | |
498 return sort_val.__ne__(sort_val_other) |