comparison resfinder/cge/output/table.py @ 0:55051a9bc58d draft default tip

Uploaded
author dcouvin
date Mon, 10 Jan 2022 20:06:07 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:55051a9bc58d
1 #!/usr/bin/env python3
2 from .orderedset import OrderedSet
3 from .exceptions import DuplicateKeyError
4 from .exceptions import LockedObjectError
5
6
7 class TableResults(dict):
8 """
9 The hit table is created empty.
10 """
11 def __init__(self, software, version, run_date, run_cmd, id):
12 self.software = software
13 self.version = version
14 self.run_date = run_date
15 self.run_cmd = run_cmd
16 self.id = id
17
18 self.long = {}
19 self.medium = {}
20 self.short = {}
21 self.databases = {}
22
23 def add_database(self, id, version):
24 self.databases[id] = version
25
26 def add_table(self, table_name, copy=False):
27 """
28 TODO: Implement copy argument
29 """
30 if(isinstance(table_name, Table)):
31 name = table_name.name
32 else:
33 name = table_name
34
35 if(self.long.get(name, False)):
36 raise DuplicateKeyError("All table names need to be unique. An "
37 "attempt was made to add a table with a "
38 "name that already exists. The name is:{}."
39 .format(name))
40
41 if(isinstance(table_name, Table)):
42 if(copy):
43 raise NotImplementedError()
44 # self.long[name] = copy.deepcopy(table_name)
45 else:
46 self.long[name] = table_name
47 else:
48 self.long[name] = Table(name)
49
50 def as_txt(self, type, header=True, sep="\t", sort=True, pre_name=""):
51 if(type == "long"):
52 tr = self.long
53 elif(type == "medium"):
54 tr = self.medium
55 elif(type == "short"):
56 tr = self.short
57 else:
58 raise ValueError("as_text method in TableResults class must be "
59 "called with either 'long', 'medium', or "
60 "'short'. It was called with {}".format(type))
61
62 out_str = ""
63
64 for table_name in tr:
65 table_str = tr[table_name].as_txt(header=header, sep=sep, sort=sort)
66 out_str += ("{pre}{name}\n{table}\n"
67 .format(pre=pre_name, name=table_name, table=table_str))
68
69 return out_str
70
71
72 def merge(self, tr2, auto_row_header=False, copy=False):
73 """
74 Merges another table result object into the calling table results
75 instance.
76
77 Table objects with identical names will be merged, as will headers
78 within merged tables.
79
80 The other table is copied with copy.deepcopy so that changes to the
81 merged tables or changes to the other table does not affect each
82 other.
83
84 IMPORTANT: Currently ONLY merges the tables stored in 'self.long'
85 IMPORTANT: Table merge function does not copy the table that gets
86 merged. Thus, a change in the table that gets merged
87 will also occur in the merged table.
88 TODO: Implement the copy argument to create a deepcopy of the
89 merged table instead of just referencing it.
90 """
91 if(copy is True):
92 raise NotImplementedError()
93
94 for table_name in tr2.long:
95 other_tbl = tr2.long[table_name]
96
97 if(table_name not in self.long):
98 self.add_table(table_name)
99 self.long[table_name] = other_tbl
100 continue
101
102 call_tbl = self.long[table_name]
103
104 for row_header in other_tbl:
105 # Get row now as row header might change if a row header
106 # collision occurs.
107 other_row_dict = other_tbl[row_header]
108
109 # Handle row header collisions
110 if(row_header in call_tbl):
111 if(auto_row_header is False):
112 raise DuplicateKeyError(
113 "During merge of two table, row headers were "
114 "found not to be unique across the two tables. "
115 "Row headers needs to be renamed or the merge "
116 "method should be called with "
117 "auto_row_header=True. Note auto row headers will "
118 "become incrementing integers.\n"
119 "Calling table: {}\n"
120 "Other table: {}\n"
121 "Row header: {}\n"
122 .format(call_tbl.name, other_tbl.name, row_header))
123 call_tbl._row_header_count += 1
124 row_header = call_tbl._row_header_count
125
126 call_tbl[row_header] = other_row_dict
127
128
129 class Row(dict):
130 """
131 """
132 def __init__(self, table):
133 self.table = table
134 super(Row, self).__init__()
135
136 def __setitem__(self, col_header, col_val):
137 if(not col_header in self.table._headers):
138 if(self.table.lock_headers):
139 raise LockedObjectError("Attempt to add a header via a Row "
140 "insertion to a table, where the "
141 "headers has been locked.\n"
142 "Table: {}\n"
143 "Row header, val: {}, {}\n"
144 "Header: {}\n"
145 .format(self.name, col_header, col_val,
146 header))
147 self.table.add_header(col_header)
148
149 super(Row, self).__setitem__(col_header, col_val)
150
151
152 class Table(dict):
153 """
154 Keys of a Table object is considered 'row headers'.
155 The Key can be None in which case the row header will become an
156 increasing integer (starting at 1).
157
158 Values must be lists of length 2. Where the first entry is the name of
159 a column header and the second entry is the value.
160 """
161 NULL_VAL = "NA"
162
163 def __init__(self, name):
164 self.name = name
165 self.lock_headers = False
166
167 self._headers = OrderedSet()
168 self._row_header_count = 0
169 self._sort_key = "row_header"
170 self._sort_list = SortList(container=self)
171 self._iter_count = 0
172
173 def add_header(self, header, exist_ok=True):
174 if(self.lock_headers):
175 raise LockedObjectError("Attempt to add a header to an object "
176 "where the headers has been locked.\n"
177 "Table: {}\n"
178 "Header: {}\n".format(self.name, header))
179
180 if(exist_ok is False and header in self._headers):
181 raise DuplicateKeyError("Attempt to add header that already exists"
182 " to table, but the add_header method was "
183 "called with exist_ok=False.\n"
184 "Table: {}\n"
185 "Header: {}\n".format(self.name, header))
186 self._headers.add(header)
187
188 def add_headers(self, headers, exist_ok=True):
189 for header in headers:
190 self.add_header(header, exist_ok)
191
192 def as_txt(self, header=True, sep="\t", sort=True):
193 """
194 """
195 if(sort):
196 self.set_sort_key(self._sort_key)
197
198 if(header):
199 out_str = ("{sep}{heads}\n"
200 .format(sep=sep, heads=sep.join(self._headers)))
201 else:
202 out_str = ""
203
204 for row in self:
205 row_list = self.get_row_as_list(row, as_txt=True)
206 out_str += ("{}\n".format(sep.join(row_list)))
207
208 return out_str
209
210
211 def extract_column(self, col):
212 """
213 """
214 if(col == "row_header"):
215 return list(self.keys())
216 if(col not in self._headers):
217 raise KeyError("Attempt to extract a column that doesn't exist.\n"
218 "Missing column name: {}\n"
219 "Table name: {}".format(col, self.name))
220 column = []
221 for entry in self._sort_list:
222 row = self[entry._key]
223 column.append(row.get(col, Table.NULL_VAL))
224 return column
225
226 def get_headers(self):
227 """ Returns an OrderedSet """
228 return self._headers
229
230 def get_row_as_list(self, row_header, as_txt=False):
231 if(row_header not in self):
232 raise KeyError("Unable to find the row_header: {}\n"
233 "In Table object named: {}"
234 .format(row_header, self.name))
235
236 if(as_txt):
237 row_list = [str(row_header)]
238 else:
239 row_list = [row_header]
240
241 for col_header in self._headers:
242 if(as_txt):
243 row_list.append(str(
244 self[row_header].get(col_header, Table.NULL_VAL)))
245 else:
246 row_list.append(
247 self[row_header].get(col_header, Table.NULL_VAL))
248
249 return row_list
250
251 def get_sort_list(self):
252 """
253 """
254 if(self._sort_key is None):
255 raise TypeError("The sort key is None. The sort key needs to be "
256 "set before calling the get_sort_list method.\n"
257 "Table: {}".format(self.name))
258 return self._sort_list
259
260 def rename_row(self, old_name, new_name):
261 """
262 """
263 row_dict = self[old_name]
264 del(self[old_name])
265 self._sort_list.remove(old_name)
266 self[new_name] = row_dict
267
268 def set_sort_key(self, key, sort=True):
269 """
270 """
271 if(key not in self._headers and key != "row_header"):
272 raise KeyError("Attempt to set sort_key to a value that is not a "
273 "header.\n"
274 "Key: {}\n"
275 "Available headers: {}\n"
276 "Table name: {}"
277 .format(key, self._headers, self.name))
278
279 self._sort_key = key
280
281 for i, entry in enumerate(self._sort_list):
282 if(self._sort_key == "row_header"):
283 entry._val = entry._key
284 else:
285 row = self[entry._key]
286 entry._val = row.get(self._sort_key, Table.NULL_VAL)
287 self._sort_list[i] = entry
288
289 if(sort is True):
290 self._sort_list.sort()
291
292 def set_sort_list(self, list):
293 """
294 """
295 if(_sort_key is None):
296 raise TypeError("The sort key is None. The sort key needs to be "
297 "set before calling the set_sort_list method.\n"
298 "Table: {}".format(self.name))
299 self._sort_list = list
300
301 def __iter__(self):
302 """
303 """
304 self._iter_count = 0
305 return self
306
307 def __next__(self):
308 """
309 """
310 if(self._iter_count >= len(self)):
311 raise StopIteration
312 entry = self._sort_list[self._iter_count]
313 self._iter_count += 1
314 return entry._key
315
316 def __setitem__(self, row_header=None, col_val=None):
317 """
318 TODO
319 """
320 if(col_val is None):
321 raise TypeError("Column key/value argument must be a dict, list or"
322 " tuple. Value was None.\n"
323 "Table: {}".format(self.name))
324
325 if(row_header is None):
326 row_header = self._row_header_count
327 self._row_header_count += 1
328
329 row_dict = self.get(row_header, Row(table=self))
330
331 # If row is not new. Sort key has been added to the sort list
332 # previously.
333 sort_key_added = True
334 if(not row_dict):
335 # If row is new a sort key needs to be added to the sort list.
336 sort_key_added = False
337 if(self._sort_key == "row_header"):
338 self._sort_list.append(row_header, row_header, ignore_len=True)
339 sort_key_added = True
340
341 # Several values must be added as a dict.
342 if(isinstance(col_val, dict)):
343 for col_name, val in col_val.items():
344 if(col_name not in self._headers):
345 self.add_header(col_name)
346 # Add sort key if needed
347 if(self._sort_key == col_name and sort_key_added is False):
348 self._sort_list.append(row_header, val, ignore_len=True)
349 sort_key_added = True
350 row_dict[col_name] = val
351 # Add sort key if needed
352 if(not sort_key_added):
353 self._sort_list.append(row_header, Table.NULL_VAL,
354 ignore_len=True)
355 return super(Table, self).__setitem__(row_header, row_dict)
356
357 # A single cell value in a table can be added using a list or tuple.
358 if(isinstance(col_val, list) or isinstance(col_val, tuple)):
359 if(len(col_val) == 2):
360 if(col_val[0] not in self._headers):
361 self.add_header(col_val[0])
362 row_dict[col_val[0]] = col_val[1]
363 return super(Table, self).__setitem__(row_header, row_dict)
364 else:
365 raise ValueError("Unexpected number of values. Expected 2. "
366 "Got: {}\n"
367 "Input: {}\n"
368 "Table: {}".format(len(col_val), col_val,
369 self.name))
370
371 raise TypeError("Unexpected type. The value must be of the type Row, "
372 "list, or tuple.\n"
373 "Recieved type: {}\n"
374 "Table: {}\n".format(type(col_name), self.name))
375
376
377 class ColumnHeader():
378 def __init__(self, table, header):
379 """
380 """
381 table._col_header_count += 1
382 self._header = header
383 self._index = table._col_header_count
384
385 def __cmp__(self, other):
386 if(not isinstance(other, ColumnHeader)):
387 raise TypeError("A ColumnHeader object cannot be compared to a "
388 "non-ColumnHeader object.\n"
389 "ColumnHeader _header: {} _index: {}\n"
390 "Other object was of type: {}"
391 .format(self._header, self._index, type(other)))
392 return super(ColumnHeader, self._index).__cmp__(other._index)
393
394
395 class SortList(list):
396 """
397 Used together with the Table class, as a way to extract rows/data in a
398 specific order defined by objects of the type SortList.
399
400 Programmers are meant to sort this list in the desired order and store
401 it in a Table class object which will then output rows/data in the
402 order defined by the list.
403
404 Implementations details:
405 The SortList object contain SortListEntries containing key variables
406 with unique values which define the order. And value variables which
407 is the 'public' values used to sort on.
408 """
409 def __init__(self, container=None, unique_list=None, val_list=None):
410 self.container = None
411 if(container is not None):
412 self.container = container
413
414 if(unique_list is None and val_list is None):
415 return
416 if(len(unique_list) != len(val_list)):
417 raise IndexError("A SortList object was initiated with two lists "
418 "of different sizes.\n"
419 "len(unique_list) {} != {} len(val_list)"
420 .format(len(unique_list), len(val_list)))
421
422 for i, val in enumerate(val_list):
423 self.append(unique_list[i], val)
424
425 def append(self, key, val, ignore_len=False):
426 entry = SortListEntry(key, val)
427 if(self.container is not None and ignore_len is False):
428 if(len(self.container) != (len(self) + 1)):
429 raise IndexError("Attempt to add an entry to SortList which "
430 "would cause the list to be longer than the "
431 "affiliated SortList container.")
432 return super(SortList, self).append(entry)
433
434 def remove(self, id):
435 """
436 """
437 for i, entry in enumerate(self):
438 if(id == entry._key):
439 del(self[i])
440
441 def __repr__(self):
442 outstr_list = []
443 for entry in self:
444 outstr_list.append("{}\t{}".format(entry._key, entry._val))
445 return "\n".join(outstr_list)
446
447
448 class SortListEntry():
449 """
450 """
451 def __init__(self, key, val):
452 self._key = key
453 self._val = val
454
455 def __cmp__(self, other):
456 if(not isinstance(other, SortListEntry)):
457 raise TypeError("A SortListEntry object cannot be compared to a "
458 "non-SortListEntry object.\n"
459 "SortListEntry key: {} val: {}\n"
460 "Other object was of type: {}"
461 .format(self._key, self._val, type(other)))
462
463 sort_val = str(self._val).lower()
464 sort_val_other = str(other._val).lower()
465 return sort_val.__cmp__(sort_val_other)
466
467 def __repr__(self):
468 return "{},{}".format(self._key, self._val)
469
470 def __lt__(self, other):
471 sort_val = str(self._val).lower()
472 sort_val_other = str(other._val).lower()
473 return sort_val.__lt__(sort_val_other)
474
475 def __le__(self, other):
476 sort_val = str(self._val).lower()
477 sort_val_other = str(other._val).lower()
478 return sort_val.__le__(sort_val_other)
479
480 def __gt__(self, other):
481 sort_val = str(self._val).lower()
482 sort_val_other = str(other._val).lower()
483 return sort_val.__gt__(sort_val_other)
484
485 def __ge__(self, other):
486 sort_val = str(self._val).lower()
487 sort_val_other = str(other._val).lower()
488 return sort_val.__ge__(sort_val_other)
489
490 def __eq__(self, other):
491 sort_val = str(self._val).lower()
492 sort_val_other = str(other._val).lower()
493 return sort_val.__eq__(sort_val_other)
494
495 def __ne__(self, other):
496 sort_val = str(self._val).lower()
497 sort_val_other = str(other._val).lower()
498 return sort_val.__ne__(sort_val_other)