comparison env/lib/python3.9/site-packages/boltons/tableutils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """If there is one recurring theme in ``boltons``, it is that Python
3 has excellent datastructures that constitute a good foundation for
4 most quick manipulations, as well as building applications. However,
5 Python usage has grown much faster than builtin data structure
6 power. Python has a growing need for more advanced general-purpose
7 data structures which behave intuitively.
8
9 The :class:`Table` class is one example. When handed one- or
10 two-dimensional data, it can provide useful, if basic, text and HTML
11 renditions of small to medium sized data. It also heuristically
12 handles recursive data of various formats (lists, dicts, namedtuples,
13 objects).
14
15 For more advanced :class:`Table`-style manipulation check out the
16 `pandas`_ DataFrame.
17
18 .. _pandas: http://pandas.pydata.org/
19
20 """
21
22 from __future__ import print_function
23
24 try:
25 from html import escape as html_escape
26 except ImportError:
27 from cgi import escape as html_escape
28 import types
29 from itertools import islice
30 try:
31 from collections.abc import Sequence, Mapping, MutableSequence
32 except ImportError:
33 from collections import Sequence, Mapping, MutableSequence
34 try:
35 string_types, integer_types = (str, unicode), (int, long)
36 from cgi import escape as html_escape
37 except NameError:
38 # Python 3 compat
39 unicode = str
40 string_types, integer_types = (str, bytes), (int,)
41 from html import escape as html_escape
42
43 try:
44 from typeutils import make_sentinel
45 _MISSING = make_sentinel(var_name='_MISSING')
46 except ImportError:
47 _MISSING = object()
48
49 """
50 Some idle feature thoughts:
51
52 * shift around column order without rearranging data
53 * gotta make it so you can add additional items, not just initialize with
54 * maybe a shortcut would be to allow adding of Tables to other Tables
55 * what's the perf of preallocating lists and overwriting items versus
56 starting from empty?
57 * is it possible to effectively tell the difference between when a
58 Table is from_data()'d with a single row (list) or with a list of lists?
59 * CSS: white-space pre-line or pre-wrap maybe?
60 * Would be nice to support different backends (currently uses lists
61 exclusively). Sometimes large datasets come in list-of-dicts and
62 list-of-tuples format and it's desirable to cut down processing overhead.
63
64 TODO: make iterable on rows?
65 """
66
67 __all__ = ['Table']
68
69
70 def to_text(obj, maxlen=None):
71 try:
72 text = unicode(obj)
73 except Exception:
74 try:
75 text = unicode(repr(obj))
76 except Exception:
77 text = unicode(object.__repr__(obj))
78 if maxlen and len(text) > maxlen:
79 text = text[:maxlen - 3] + '...'
80 # TODO: inverse of ljust/rjust/center
81 return text
82
83
84 def escape_html(obj, maxlen=None):
85 text = to_text(obj, maxlen=maxlen)
86 return html_escape(text, quote=True)
87
88
89 _DNR = set((type(None), bool, complex, float,
90 type(NotImplemented), slice,
91 types.FunctionType, types.MethodType, types.BuiltinFunctionType,
92 types.GeneratorType) + string_types + integer_types)
93
94
95 class UnsupportedData(TypeError):
96 pass
97
98
99 class InputType(object):
100 def __init__(self, *a, **kw):
101 pass
102
103 def get_entry_seq(self, data_seq, headers):
104 return [self.get_entry(entry, headers) for entry in data_seq]
105
106
107 class DictInputType(InputType):
108 def check_type(self, obj):
109 return isinstance(obj, Mapping)
110
111 def guess_headers(self, obj):
112 return sorted(obj.keys())
113
114 def get_entry(self, obj, headers):
115 return [obj.get(h) for h in headers]
116
117 def get_entry_seq(self, obj, headers):
118 return [[ci.get(h) for h in headers] for ci in obj]
119
120
121 class ObjectInputType(InputType):
122 def check_type(self, obj):
123 return type(obj) not in _DNR and hasattr(obj, '__class__')
124
125 def guess_headers(self, obj):
126 headers = []
127 for attr in dir(obj):
128 # an object's __dict__ could technically have non-string keys
129 try:
130 val = getattr(obj, attr)
131 except Exception:
132 # seen on greenlet: `run` shows in dir() but raises
133 # AttributeError. Also properties misbehave.
134 continue
135 if callable(val):
136 continue
137 headers.append(attr)
138 return headers
139
140 def get_entry(self, obj, headers):
141 values = []
142 for h in headers:
143 try:
144 values.append(getattr(obj, h))
145 except Exception:
146 values.append(None)
147 return values
148
149
150 # might be better to hardcode list support since it's so close to the
151 # core or might be better to make this the copy-style from_* importer
152 # and have the non-copy style be hardcoded in __init__
153 class ListInputType(InputType):
154 def check_type(self, obj):
155 return isinstance(obj, MutableSequence)
156
157 def guess_headers(self, obj):
158 return None
159
160 def get_entry(self, obj, headers):
161 return obj
162
163 def get_entry_seq(self, obj_seq, headers):
164 return obj_seq
165
166
167 class TupleInputType(InputType):
168 def check_type(self, obj):
169 return isinstance(obj, tuple)
170
171 def guess_headers(self, obj):
172 return None
173
174 def get_entry(self, obj, headers):
175 return list(obj)
176
177 def get_entry_seq(self, obj_seq, headers):
178 return [list(t) for t in obj_seq]
179
180
181 class NamedTupleInputType(InputType):
182 def check_type(self, obj):
183 return hasattr(obj, '_fields') and isinstance(obj, tuple)
184
185 def guess_headers(self, obj):
186 return list(obj._fields)
187
188 def get_entry(self, obj, headers):
189 return [getattr(obj, h, None) for h in headers]
190
191 def get_entry_seq(self, obj_seq, headers):
192 return [[getattr(obj, h, None) for h in headers] for obj in obj_seq]
193
194
195 class Table(object):
196 """
197 This Table class is meant to be simple, low-overhead, and extensible. Its
198 most common use would be for translation between in-memory data
199 structures and serialization formats, such as HTML and console-ready text.
200
201 As such, it stores data in list-of-lists format, and *does not* copy
202 lists passed in. It also reserves the right to modify those lists in a
203 "filling" process, whereby short lists are extended to the width of
204 the table (usually determined by number of headers). This greatly
205 reduces overhead and processing/validation that would have to occur
206 otherwise.
207
208 General description of headers behavior:
209
210 Headers describe the columns, but are not part of the data, however,
211 if the *headers* argument is omitted, Table tries to infer header
212 names from the data. It is possible to have a table with no headers,
213 just pass in ``headers=None``.
214
215 Supported inputs:
216
217 * :class:`list` of :class:`list` objects
218 * :class:`dict` (list/single)
219 * :class:`object` (list/single)
220 * :class:`collections.namedtuple` (list/single)
221 * TODO: DB API cursor?
222 * TODO: json
223
224 Supported outputs:
225
226 * HTML
227 * Pretty text (also usable as GF Markdown)
228 * TODO: CSV
229 * TODO: json
230 * TODO: json lines
231
232 To minimize resident size, the Table data is stored as a list of lists.
233 """
234
235 # order definitely matters here
236 _input_types = [DictInputType(), ListInputType(),
237 NamedTupleInputType(), TupleInputType(),
238 ObjectInputType()]
239
240 _html_tr, _html_tr_close = '<tr>', '</tr>'
241 _html_th, _html_th_close = '<th>', '</th>'
242 _html_td, _html_td_close = '<td>', '</td>'
243 _html_thead, _html_thead_close = '<thead>', '</thead>'
244 _html_tbody, _html_tbody_close = '<tbody>', '</tbody>'
245
246 # _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>'
247 _html_table_tag, _html_table_tag_close = '<table>', '</table>'
248
249 def __init__(self, data=None, headers=_MISSING, metadata=None):
250 if headers is _MISSING:
251 headers = []
252 if data:
253 headers, data = list(data[0]), islice(data, 1, None)
254 self.headers = headers or []
255 self.metadata = metadata or {}
256 self._data = []
257 self._width = 0
258
259 self.extend(data)
260
261 def extend(self, data):
262 """
263 Append the given data to the end of the Table.
264 """
265 if not data:
266 return
267 self._data.extend(data)
268 self._set_width()
269 self._fill()
270
271 def _set_width(self, reset=False):
272 if reset:
273 self._width = 0
274 if self._width:
275 return
276 if self.headers:
277 self._width = len(self.headers)
278 return
279 self._width = max([len(d) for d in self._data])
280
281 def _fill(self):
282 width, filler = self._width, [None]
283 if not width:
284 return
285 for d in self._data:
286 rem = width - len(d)
287 if rem > 0:
288 d.extend(filler * rem)
289 return
290
291 @classmethod
292 def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None):
293 """Create a Table from a :class:`dict`. Operates the same as
294 :meth:`from_data`, but forces interpretation of the data as a
295 Mapping.
296 """
297 return cls.from_data(data=data, headers=headers,
298 max_depth=max_depth, _data_type=DictInputType(),
299 metadata=metadata)
300
301 @classmethod
302 def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None):
303 """Create a Table from a :class:`list`. Operates the same as
304 :meth:`from_data`, but forces the interpretation of the data
305 as a Sequence.
306 """
307 return cls.from_data(data=data, headers=headers,
308 max_depth=max_depth, _data_type=ListInputType(),
309 metadata=metadata)
310
311 @classmethod
312 def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None):
313 """Create a Table from an :class:`object`. Operates the same as
314 :meth:`from_data`, but forces the interpretation of the data
315 as an object. May be useful for some :class:`dict` and
316 :class:`list` subtypes.
317 """
318 return cls.from_data(data=data, headers=headers,
319 max_depth=max_depth, _data_type=ObjectInputType(),
320 metadata=metadata)
321
322 @classmethod
323 def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs):
324
325 """Create a Table from any supported data, heuristically
326 selecting how to represent the data in Table format.
327
328 Args:
329 data (object): Any object or iterable with data to be
330 imported to the Table.
331
332 headers (iterable): An iterable of headers to be matched
333 to the data. If not explicitly passed, headers will be
334 guessed for certain datatypes.
335
336 max_depth (int): The level to which nested Tables should
337 be created (default: 1).
338
339 _data_type (InputType subclass): For advanced use cases,
340 do not guess the type of the input data, use this data
341 type instead.
342 """
343 # TODO: seen/cycle detection/reuse ?
344 # maxdepth follows the same behavior as find command
345 # i.e., it doesn't work if max_depth=0 is passed in
346 metadata = kwargs.pop('metadata', None)
347 _data_type = kwargs.pop('_data_type', None)
348
349 if max_depth < 1:
350 # return data instead?
351 return cls(headers=headers, metadata=metadata)
352 is_seq = isinstance(data, Sequence)
353 if is_seq:
354 if not data:
355 return cls(headers=headers, metadata=metadata)
356 to_check = data[0]
357 if not _data_type:
358 for it in cls._input_types:
359 if it.check_type(to_check):
360 _data_type = it
361 break
362 else:
363 # not particularly happy about this rewind-y approach
364 is_seq = False
365 to_check = data
366 else:
367 if type(data) in _DNR:
368 # hmm, got scalar data.
369 # raise an exception or make an exception, nahmsayn?
370 return cls([[data]], headers=headers, metadata=metadata)
371 to_check = data
372 if not _data_type:
373 for it in cls._input_types:
374 if it.check_type(to_check):
375 _data_type = it
376 break
377 else:
378 raise UnsupportedData('unsupported data type %r'
379 % type(data))
380 if headers is _MISSING:
381 headers = _data_type.guess_headers(to_check)
382 if is_seq:
383 entries = _data_type.get_entry_seq(data, headers)
384 else:
385 entries = [_data_type.get_entry(data, headers)]
386 if max_depth > 1:
387 new_max_depth = max_depth - 1
388 for i, entry in enumerate(entries):
389 for j, cell in enumerate(entry):
390 if type(cell) in _DNR:
391 # optimization to avoid function overhead
392 continue
393 try:
394 entries[i][j] = cls.from_data(cell,
395 max_depth=new_max_depth)
396 except UnsupportedData:
397 continue
398 return cls(entries, headers=headers, metadata=metadata)
399
400 def __len__(self):
401 return len(self._data)
402
403 def __getitem__(self, idx):
404 return self._data[idx]
405
406 def __repr__(self):
407 cn = self.__class__.__name__
408 if self.headers:
409 return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data)
410 else:
411 return '%s(%r)' % (cn, self._data)
412
413 def to_html(self, orientation=None, wrapped=True,
414 with_headers=True, with_newlines=True,
415 with_metadata=False, max_depth=1):
416 """Render this Table to HTML. Configure the structure of Table
417 HTML by subclassing and overriding ``_html_*`` class
418 attributes.
419
420 Args:
421 orientation (str): one of 'auto', 'horizontal', or
422 'vertical' (or the first letter of any of
423 those). Default 'auto'.
424 wrapped (bool): whether or not to include the wrapping
425 '<table></table>' tags. Default ``True``, set to
426 ``False`` if appending multiple Table outputs or an
427 otherwise customized HTML wrapping tag is needed.
428 with_newlines (bool): Set to ``True`` if output should
429 include added newlines to make the HTML more
430 readable. Default ``False``.
431 with_metadata (bool/str): Set to ``True`` if output should
432 be preceded with a Table of preset metadata, if it
433 exists. Set to special value ``'bottom'`` if the
434 metadata Table HTML should come *after* the main HTML output.
435 max_depth (int): Indicate how deeply to nest HTML tables
436 before simply reverting to :func:`repr`-ing the nested
437 data.
438
439 Returns:
440 A text string of the HTML of the rendered table.
441
442 """
443 lines = []
444 headers = []
445 if with_metadata and self.metadata:
446 metadata_table = Table.from_data(self.metadata,
447 max_depth=max_depth)
448 metadata_html = metadata_table.to_html(with_headers=True,
449 with_newlines=with_newlines,
450 with_metadata=False,
451 max_depth=max_depth)
452 if with_metadata != 'bottom':
453 lines.append(metadata_html)
454 lines.append('<br />')
455
456 if with_headers and self.headers:
457 headers.extend(self.headers)
458 headers.extend([None] * (self._width - len(self.headers)))
459 if wrapped:
460 lines.append(self._html_table_tag)
461 orientation = orientation or 'auto'
462 ol = orientation[0].lower()
463 if ol == 'a':
464 ol = 'h' if len(self) > 1 else 'v'
465 if ol == 'h':
466 self._add_horizontal_html_lines(lines, headers=headers,
467 max_depth=max_depth)
468 elif ol == 'v':
469 self._add_vertical_html_lines(lines, headers=headers,
470 max_depth=max_depth)
471 else:
472 raise ValueError("expected one of 'auto', 'vertical', or"
473 " 'horizontal', not %r" % orientation)
474 if with_metadata and self.metadata and with_metadata == 'bottom':
475 lines.append('<br />')
476 lines.append(metadata_html)
477
478 if wrapped:
479 lines.append(self._html_table_tag_close)
480 sep = '\n' if with_newlines else ''
481 return sep.join(lines)
482
483 def get_cell_html(self, value):
484 """Called on each value in an HTML table. By default it simply escapes
485 the HTML. Override this method to add additional conditions
486 and behaviors, but take care to ensure the final output is
487 HTML escaped.
488 """
489 return escape_html(value)
490
491 def _add_horizontal_html_lines(self, lines, headers, max_depth):
492 esc = self.get_cell_html
493 new_depth = max_depth - 1 if max_depth > 1 else max_depth
494 if max_depth > 1:
495 new_depth = max_depth - 1
496 if headers:
497 _thth = self._html_th_close + self._html_th
498 lines.append(self._html_thead)
499 lines.append(self._html_tr + self._html_th +
500 _thth.join([esc(h) for h in headers]) +
501 self._html_th_close + self._html_tr_close)
502 lines.append(self._html_thead_close)
503 trtd, _tdtd, _td_tr = (self._html_tr + self._html_td,
504 self._html_td_close + self._html_td,
505 self._html_td_close + self._html_tr_close)
506 lines.append(self._html_tbody)
507 for row in self._data:
508 if max_depth > 1:
509 _fill_parts = []
510 for cell in row:
511 if isinstance(cell, Table):
512 _fill_parts.append(cell.to_html(max_depth=new_depth))
513 else:
514 _fill_parts.append(esc(cell))
515 else:
516 _fill_parts = [esc(c) for c in row]
517 lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr]))
518 lines.append(self._html_tbody_close)
519
520 def _add_vertical_html_lines(self, lines, headers, max_depth):
521 esc = self.get_cell_html
522 new_depth = max_depth - 1 if max_depth > 1 else max_depth
523 tr, th, _th = self._html_tr, self._html_th, self._html_th_close
524 td, _tdtd = self._html_td, self._html_td_close + self._html_td
525 _td_tr = self._html_td_close + self._html_tr_close
526 for i in range(self._width):
527 line_parts = [tr]
528 if headers:
529 line_parts.extend([th, esc(headers[i]), _th])
530 if max_depth > 1:
531 new_depth = max_depth - 1
532 _fill_parts = []
533 for row in self._data:
534 cell = row[i]
535 if isinstance(cell, Table):
536 _fill_parts.append(cell.to_html(max_depth=new_depth))
537 else:
538 _fill_parts.append(esc(row[i]))
539 else:
540 _fill_parts = [esc(row[i]) for row in self._data]
541 line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr])
542 lines.append(''.join(line_parts))
543
544 def to_text(self, with_headers=True, maxlen=None):
545 """Get the Table's textual representation. Only works well
546 for Tables with non-recursive data.
547
548 Args:
549 with_headers (bool): Whether to include a header row at the top.
550 maxlen (int): Max length of data in each cell.
551 """
552 lines = []
553 widths = []
554 headers = list(self.headers)
555 text_data = [[to_text(cell, maxlen=maxlen) for cell in row]
556 for row in self._data]
557 for idx in range(self._width):
558 cur_widths = [len(cur) for cur in text_data]
559 if with_headers:
560 cur_widths.append(len(to_text(headers[idx], maxlen=maxlen)))
561 widths.append(max(cur_widths))
562 if with_headers:
563 lines.append(' | '.join([h.center(widths[i])
564 for i, h in enumerate(headers)]))
565 lines.append('-|-'.join(['-' * w for w in widths]))
566 for row in text_data:
567 lines.append(' | '.join([cell.center(widths[j])
568 for j, cell in enumerate(row)]))
569 return '\n'.join(lines)