comparison env/lib/python3.9/site-packages/boltons/cacheutils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """``cacheutils`` contains consistent implementations of fundamental
3 cache types. Currently there are two to choose from:
4
5 * :class:`LRI` - Least-recently inserted
6 * :class:`LRU` - Least-recently used
7
8 Both caches are :class:`dict` subtypes, designed to be as
9 interchangeable as possible, to facilitate experimentation. A key
10 practice with performance enhancement with caching is ensuring that
11 the caching strategy is working. If the cache is constantly missing,
12 it is just adding more overhead and code complexity. The standard
13 statistics are:
14
15 * ``hit_count`` - the number of times the queried key has been in
16 the cache
17 * ``miss_count`` - the number of times a key has been absent and/or
18 fetched by the cache
19 * ``soft_miss_count`` - the number of times a key has been absent,
20 but a default has been provided by the caller, as with
21 :meth:`dict.get` and :meth:`dict.setdefault`. Soft misses are a
22 subset of misses, so this number is always less than or equal to
23 ``miss_count``.
24
25 Additionally, ``cacheutils`` provides :class:`ThresholdCounter`, a
26 cache-like bounded counter useful for online statistics collection.
27
28 Learn more about `caching algorithms on Wikipedia
29 <https://en.wikipedia.org/wiki/Cache_algorithms#Examples>`_.
30
31 """
32
33 # TODO: TimedLRI
34 # TODO: support 0 max_size?
35
36
37 import heapq
38 import weakref
39 import itertools
40 from operator import attrgetter
41
42 try:
43 from threading import RLock
44 except Exception:
45 class RLock(object):
46 'Dummy reentrant lock for builds without threads'
47 def __enter__(self):
48 pass
49
50 def __exit__(self, exctype, excinst, exctb):
51 pass
52
53 try:
54 from boltons.typeutils import make_sentinel
55 _MISSING = make_sentinel(var_name='_MISSING')
56 _KWARG_MARK = make_sentinel(var_name='_KWARG_MARK')
57 except ImportError:
58 _MISSING = object()
59 _KWARG_MARK = object()
60
61 try:
62 xrange
63 except NameError:
64 # py3
65 xrange = range
66 unicode, str, bytes, basestring = str, bytes, bytes, (str, bytes)
67
68 PREV, NEXT, KEY, VALUE = range(4) # names for the link fields
69 DEFAULT_MAX_SIZE = 128
70
71
72 class LRI(dict):
73 """The ``LRI`` implements the basic *Least Recently Inserted* strategy to
74 caching. One could also think of this as a ``SizeLimitedDefaultDict``.
75
76 *on_miss* is a callable that accepts the missing key (as opposed
77 to :class:`collections.defaultdict`'s "default_factory", which
78 accepts no arguments.) Also note that, like the :class:`LRI`,
79 the ``LRI`` is instrumented with statistics tracking.
80
81 >>> cap_cache = LRI(max_size=2)
82 >>> cap_cache['a'], cap_cache['b'] = 'A', 'B'
83 >>> from pprint import pprint as pp
84 >>> pp(dict(cap_cache))
85 {'a': 'A', 'b': 'B'}
86 >>> [cap_cache['b'] for i in range(3)][0]
87 'B'
88 >>> cap_cache['c'] = 'C'
89 >>> print(cap_cache.get('a'))
90 None
91 >>> cap_cache.hit_count, cap_cache.miss_count, cap_cache.soft_miss_count
92 (3, 1, 1)
93 """
94 def __init__(self, max_size=DEFAULT_MAX_SIZE, values=None,
95 on_miss=None):
96 if max_size <= 0:
97 raise ValueError('expected max_size > 0, not %r' % max_size)
98 self.hit_count = self.miss_count = self.soft_miss_count = 0
99 self.max_size = max_size
100 self._lock = RLock()
101 self._init_ll()
102
103 if on_miss is not None and not callable(on_miss):
104 raise TypeError('expected on_miss to be a callable'
105 ' (or None), not %r' % on_miss)
106 self.on_miss = on_miss
107
108 if values:
109 self.update(values)
110
111 # TODO: fromkeys()?
112
113 # linked list manipulation methods.
114 #
115 # invariants:
116 # 1) 'anchor' is the sentinel node in the doubly linked list. there is
117 # always only one, and its KEY and VALUE are both _MISSING.
118 # 2) the most recently accessed node comes immediately before 'anchor'.
119 # 3) the least recently accessed node comes immediately after 'anchor'.
120 def _init_ll(self):
121 anchor = []
122 anchor[:] = [anchor, anchor, _MISSING, _MISSING]
123 # a link lookup table for finding linked list links in O(1)
124 # time.
125 self._link_lookup = {}
126 self._anchor = anchor
127
128 def _print_ll(self):
129 print('***')
130 for (key, val) in self._get_flattened_ll():
131 print(key, val)
132 print('***')
133 return
134
135 def _get_flattened_ll(self):
136 flattened_list = []
137 link = self._anchor
138 while True:
139 flattened_list.append((link[KEY], link[VALUE]))
140 link = link[NEXT]
141 if link is self._anchor:
142 break
143 return flattened_list
144
145 def _get_link_and_move_to_front_of_ll(self, key):
146 # find what will become the newest link. this may raise a
147 # KeyError, which is useful to __getitem__ and __setitem__
148 newest = self._link_lookup[key]
149
150 # splice out what will become the newest link.
151 newest[PREV][NEXT] = newest[NEXT]
152 newest[NEXT][PREV] = newest[PREV]
153
154 # move what will become the newest link immediately before
155 # anchor (invariant 2)
156 anchor = self._anchor
157 second_newest = anchor[PREV]
158 second_newest[NEXT] = anchor[PREV] = newest
159 newest[PREV] = second_newest
160 newest[NEXT] = anchor
161 return newest
162
163 def _set_key_and_add_to_front_of_ll(self, key, value):
164 # create a new link and place it immediately before anchor
165 # (invariant 2).
166 anchor = self._anchor
167 second_newest = anchor[PREV]
168 newest = [second_newest, anchor, key, value]
169 second_newest[NEXT] = anchor[PREV] = newest
170 self._link_lookup[key] = newest
171
172 def _set_key_and_evict_last_in_ll(self, key, value):
173 # the link after anchor is the oldest in the linked list
174 # (invariant 3). the current anchor becomes a link that holds
175 # the newest key, and the oldest link becomes the new anchor
176 # (invariant 1). now the newest link comes before anchor
177 # (invariant 2). no links are moved; only their keys
178 # and values are changed.
179 oldanchor = self._anchor
180 oldanchor[KEY] = key
181 oldanchor[VALUE] = value
182
183 self._anchor = anchor = oldanchor[NEXT]
184 evicted = anchor[KEY]
185 anchor[KEY] = anchor[VALUE] = _MISSING
186 del self._link_lookup[evicted]
187 self._link_lookup[key] = oldanchor
188 return evicted
189
190 def _remove_from_ll(self, key):
191 # splice a link out of the list and drop it from our lookup
192 # table.
193 link = self._link_lookup.pop(key)
194 link[PREV][NEXT] = link[NEXT]
195 link[NEXT][PREV] = link[PREV]
196
197 def __setitem__(self, key, value):
198 with self._lock:
199 try:
200 link = self._get_link_and_move_to_front_of_ll(key)
201 except KeyError:
202 if len(self) < self.max_size:
203 self._set_key_and_add_to_front_of_ll(key, value)
204 else:
205 evicted = self._set_key_and_evict_last_in_ll(key, value)
206 super(LRI, self).__delitem__(evicted)
207 super(LRI, self).__setitem__(key, value)
208 else:
209 link[VALUE] = value
210
211 def __getitem__(self, key):
212 with self._lock:
213 try:
214 link = self._link_lookup[key]
215 except KeyError:
216 self.miss_count += 1
217 if not self.on_miss:
218 raise
219 ret = self[key] = self.on_miss(key)
220 return ret
221
222 self.hit_count += 1
223 return link[VALUE]
224
225 def get(self, key, default=None):
226 try:
227 return self[key]
228 except KeyError:
229 self.soft_miss_count += 1
230 return default
231
232 def __delitem__(self, key):
233 with self._lock:
234 super(LRI, self).__delitem__(key)
235 self._remove_from_ll(key)
236
237 def pop(self, key, default=_MISSING):
238 # NB: hit/miss counts are bypassed for pop()
239 with self._lock:
240 try:
241 ret = super(LRI, self).pop(key)
242 except KeyError:
243 if default is _MISSING:
244 raise
245 ret = default
246 else:
247 self._remove_from_ll(key)
248 return ret
249
250 def popitem(self):
251 with self._lock:
252 item = super(LRI, self).popitem()
253 self._remove_from_ll(item[0])
254 return item
255
256 def clear(self):
257 with self._lock:
258 super(LRI, self).clear()
259 self._init_ll()
260
261 def copy(self):
262 return self.__class__(max_size=self.max_size, values=self)
263
264 def setdefault(self, key, default=None):
265 with self._lock:
266 try:
267 return self[key]
268 except KeyError:
269 self.soft_miss_count += 1
270 self[key] = default
271 return default
272
273 def update(self, E, **F):
274 # E and F are throwback names to the dict() __doc__
275 with self._lock:
276 if E is self:
277 return
278 setitem = self.__setitem__
279 if callable(getattr(E, 'keys', None)):
280 for k in E.keys():
281 setitem(k, E[k])
282 else:
283 for k, v in E:
284 setitem(k, v)
285 for k in F:
286 setitem(k, F[k])
287 return
288
289 def __eq__(self, other):
290 with self._lock:
291 if self is other:
292 return True
293 if len(other) != len(self):
294 return False
295 if not isinstance(other, LRI):
296 return other == self
297 return super(LRI, self).__eq__(other)
298
299 def __ne__(self, other):
300 return not (self == other)
301
302 def __repr__(self):
303 cn = self.__class__.__name__
304 val_map = super(LRI, self).__repr__()
305 return ('%s(max_size=%r, on_miss=%r, values=%s)'
306 % (cn, self.max_size, self.on_miss, val_map))
307
308
309 class LRU(LRI):
310 """The ``LRU`` is :class:`dict` subtype implementation of the
311 *Least-Recently Used* caching strategy.
312
313 Args:
314 max_size (int): Max number of items to cache. Defaults to ``128``.
315 values (iterable): Initial values for the cache. Defaults to ``None``.
316 on_miss (callable): a callable which accepts a single argument, the
317 key not present in the cache, and returns the value to be cached.
318
319 >>> cap_cache = LRU(max_size=2)
320 >>> cap_cache['a'], cap_cache['b'] = 'A', 'B'
321 >>> from pprint import pprint as pp
322 >>> pp(dict(cap_cache))
323 {'a': 'A', 'b': 'B'}
324 >>> [cap_cache['b'] for i in range(3)][0]
325 'B'
326 >>> cap_cache['c'] = 'C'
327 >>> print(cap_cache.get('a'))
328 None
329
330 This cache is also instrumented with statistics
331 collection. ``hit_count``, ``miss_count``, and ``soft_miss_count``
332 are all integer members that can be used to introspect the
333 performance of the cache. ("Soft" misses are misses that did not
334 raise :exc:`KeyError`, e.g., ``LRU.get()`` or ``on_miss`` was used to
335 cache a default.
336
337 >>> cap_cache.hit_count, cap_cache.miss_count, cap_cache.soft_miss_count
338 (3, 1, 1)
339
340 Other than the size-limiting caching behavior and statistics,
341 ``LRU`` acts like its parent class, the built-in Python :class:`dict`.
342 """
343 def __getitem__(self, key):
344 with self._lock:
345 try:
346 link = self._get_link_and_move_to_front_of_ll(key)
347 except KeyError:
348 self.miss_count += 1
349 if not self.on_miss:
350 raise
351 ret = self[key] = self.on_miss(key)
352 return ret
353
354 self.hit_count += 1
355 return link[VALUE]
356
357
358 ### Cached decorator
359 # Key-making technique adapted from Python 3.4's functools
360
361 class _HashedKey(list):
362 """The _HashedKey guarantees that hash() will be called no more than once
363 per cached function invocation.
364 """
365 __slots__ = 'hash_value'
366
367 def __init__(self, key):
368 self[:] = key
369 self.hash_value = hash(tuple(key))
370
371 def __hash__(self):
372 return self.hash_value
373
374 def __repr__(self):
375 return '%s(%s)' % (self.__class__.__name__, list.__repr__(self))
376
377
378 def make_cache_key(args, kwargs, typed=False,
379 kwarg_mark=_KWARG_MARK,
380 fasttypes=frozenset([int, str, frozenset, type(None)])):
381 """Make a generic key from a function's positional and keyword
382 arguments, suitable for use in caches. Arguments within *args* and
383 *kwargs* must be `hashable`_. If *typed* is ``True``, ``3`` and
384 ``3.0`` will be treated as separate keys.
385
386 The key is constructed in a way that is flat as possible rather than
387 as a nested structure that would take more memory.
388
389 If there is only a single argument and its data type is known to cache
390 its hash value, then that argument is returned without a wrapper. This
391 saves space and improves lookup speed.
392
393 >>> tuple(make_cache_key(('a', 'b'), {'c': ('d')}))
394 ('a', 'b', _KWARG_MARK, ('c', 'd'))
395
396 .. _hashable: https://docs.python.org/2/glossary.html#term-hashable
397 """
398
399 # key = [func_name] if func_name else []
400 # key.extend(args)
401 key = list(args)
402 if kwargs:
403 sorted_items = sorted(kwargs.items())
404 key.append(kwarg_mark)
405 key.extend(sorted_items)
406 if typed:
407 key.extend([type(v) for v in args])
408 if kwargs:
409 key.extend([type(v) for k, v in sorted_items])
410 elif len(key) == 1 and type(key[0]) in fasttypes:
411 return key[0]
412 return _HashedKey(key)
413
414 # for backwards compatibility in case someone was importing it
415 _make_cache_key = make_cache_key
416
417
418 class CachedFunction(object):
419 """This type is used by :func:`cached`, below. Instances of this
420 class are used to wrap functions in caching logic.
421 """
422 def __init__(self, func, cache, scoped=True, typed=False, key=None):
423 self.func = func
424 if callable(cache):
425 self.get_cache = cache
426 elif not (callable(getattr(cache, '__getitem__', None))
427 and callable(getattr(cache, '__setitem__', None))):
428 raise TypeError('expected cache to be a dict-like object,'
429 ' or callable returning a dict-like object, not %r'
430 % cache)
431 else:
432 def _get_cache():
433 return cache
434 self.get_cache = _get_cache
435 self.scoped = scoped
436 self.typed = typed
437 self.key_func = key or make_cache_key
438
439 def __call__(self, *args, **kwargs):
440 cache = self.get_cache()
441 key = self.key_func(args, kwargs, typed=self.typed)
442 try:
443 ret = cache[key]
444 except KeyError:
445 ret = cache[key] = self.func(*args, **kwargs)
446 return ret
447
448 def __repr__(self):
449 cn = self.__class__.__name__
450 if self.typed or not self.scoped:
451 return ("%s(func=%r, scoped=%r, typed=%r)"
452 % (cn, self.func, self.scoped, self.typed))
453 return "%s(func=%r)" % (cn, self.func)
454
455
456 class CachedMethod(object):
457 """Similar to :class:`CachedFunction`, this type is used by
458 :func:`cachedmethod` to wrap methods in caching logic.
459 """
460 def __init__(self, func, cache, scoped=True, typed=False, key=None):
461 self.func = func
462 self.__isabstractmethod__ = getattr(func, '__isabstractmethod__', False)
463 if isinstance(cache, basestring):
464 self.get_cache = attrgetter(cache)
465 elif callable(cache):
466 self.get_cache = cache
467 elif not (callable(getattr(cache, '__getitem__', None))
468 and callable(getattr(cache, '__setitem__', None))):
469 raise TypeError('expected cache to be an attribute name,'
470 ' dict-like object, or callable returning'
471 ' a dict-like object, not %r' % cache)
472 else:
473 def _get_cache(obj):
474 return cache
475 self.get_cache = _get_cache
476 self.scoped = scoped
477 self.typed = typed
478 self.key_func = key or make_cache_key
479 self.bound_to = None
480
481 def __get__(self, obj, objtype=None):
482 if obj is None:
483 return self
484 cls = self.__class__
485 ret = cls(self.func, self.get_cache, typed=self.typed,
486 scoped=self.scoped, key=self.key_func)
487 ret.bound_to = obj
488 return ret
489
490 def __call__(self, *args, **kwargs):
491 obj = args[0] if self.bound_to is None else self.bound_to
492 cache = self.get_cache(obj)
493 key_args = (self.bound_to, self.func) + args if self.scoped else args
494 key = self.key_func(key_args, kwargs, typed=self.typed)
495 try:
496 ret = cache[key]
497 except KeyError:
498 if self.bound_to is not None:
499 args = (self.bound_to,) + args
500 ret = cache[key] = self.func(*args, **kwargs)
501 return ret
502
503 def __repr__(self):
504 cn = self.__class__.__name__
505 args = (cn, self.func, self.scoped, self.typed)
506 if self.bound_to is not None:
507 args += (self.bound_to,)
508 return ('<%s func=%r scoped=%r typed=%r bound_to=%r>' % args)
509 return ("%s(func=%r, scoped=%r, typed=%r)" % args)
510
511
512 def cached(cache, scoped=True, typed=False, key=None):
513 """Cache any function with the cache object of your choosing. Note
514 that the function wrapped should take only `hashable`_ arguments.
515
516 Args:
517 cache (Mapping): Any :class:`dict`-like object suitable for
518 use as a cache. Instances of the :class:`LRU` and
519 :class:`LRI` are good choices, but a plain :class:`dict`
520 can work in some cases, as well. This argument can also be
521 a callable which accepts no arguments and returns a mapping.
522 scoped (bool): Whether the function itself is part of the
523 cache key. ``True`` by default, different functions will
524 not read one another's cache entries, but can evict one
525 another's results. ``False`` can be useful for certain
526 shared cache use cases. More advanced behavior can be
527 produced through the *key* argument.
528 typed (bool): Whether to factor argument types into the cache
529 check. Default ``False``, setting to ``True`` causes the
530 cache keys for ``3`` and ``3.0`` to be considered unequal.
531
532 >>> my_cache = LRU()
533 >>> @cached(my_cache)
534 ... def cached_lower(x):
535 ... return x.lower()
536 ...
537 >>> cached_lower("CaChInG's FuN AgAiN!")
538 "caching's fun again!"
539 >>> len(my_cache)
540 1
541
542 .. _hashable: https://docs.python.org/2/glossary.html#term-hashable
543
544 """
545 def cached_func_decorator(func):
546 return CachedFunction(func, cache, scoped=scoped, typed=typed, key=key)
547 return cached_func_decorator
548
549
550 def cachedmethod(cache, scoped=True, typed=False, key=None):
551 """Similar to :func:`cached`, ``cachedmethod`` is used to cache
552 methods based on their arguments, using any :class:`dict`-like
553 *cache* object.
554
555 Args:
556 cache (str/Mapping/callable): Can be the name of an attribute
557 on the instance, any Mapping/:class:`dict`-like object, or
558 a callable which returns a Mapping.
559 scoped (bool): Whether the method itself and the object it is
560 bound to are part of the cache keys. ``True`` by default,
561 different methods will not read one another's cache
562 results. ``False`` can be useful for certain shared cache
563 use cases. More advanced behavior can be produced through
564 the *key* arguments.
565 typed (bool): Whether to factor argument types into the cache
566 check. Default ``False``, setting to ``True`` causes the
567 cache keys for ``3`` and ``3.0`` to be considered unequal.
568 key (callable): A callable with a signature that matches
569 :func:`make_cache_key` that returns a tuple of hashable
570 values to be used as the key in the cache.
571
572 >>> class Lowerer(object):
573 ... def __init__(self):
574 ... self.cache = LRI()
575 ...
576 ... @cachedmethod('cache')
577 ... def lower(self, text):
578 ... return text.lower()
579 ...
580 >>> lowerer = Lowerer()
581 >>> lowerer.lower('WOW WHO COULD GUESS CACHING COULD BE SO NEAT')
582 'wow who could guess caching could be so neat'
583 >>> len(lowerer.cache)
584 1
585
586 """
587 def cached_method_decorator(func):
588 return CachedMethod(func, cache, scoped=scoped, typed=typed, key=key)
589 return cached_method_decorator
590
591
592 class cachedproperty(object):
593 """The ``cachedproperty`` is used similar to :class:`property`, except
594 that the wrapped method is only called once. This is commonly used
595 to implement lazy attributes.
596
597 After the property has been accessed, the value is stored on the
598 instance itself, using the same name as the cachedproperty. This
599 allows the cache to be cleared with :func:`delattr`, or through
600 manipulating the object's ``__dict__``.
601 """
602 def __init__(self, func):
603 self.__doc__ = getattr(func, '__doc__')
604 self.__isabstractmethod__ = getattr(func, '__isabstractmethod__', False)
605 self.func = func
606
607 def __get__(self, obj, objtype=None):
608 if obj is None:
609 return self
610 value = obj.__dict__[self.func.__name__] = self.func(obj)
611 return value
612
613 def __repr__(self):
614 cn = self.__class__.__name__
615 return '<%s func=%s>' % (cn, self.func)
616
617
618 class ThresholdCounter(object):
619 """A **bounded** dict-like Mapping from keys to counts. The
620 ThresholdCounter automatically compacts after every (1 /
621 *threshold*) additions, maintaining exact counts for any keys
622 whose count represents at least a *threshold* ratio of the total
623 data. In other words, if a particular key is not present in the
624 ThresholdCounter, its count represents less than *threshold* of
625 the total data.
626
627 >>> tc = ThresholdCounter(threshold=0.1)
628 >>> tc.add(1)
629 >>> tc.items()
630 [(1, 1)]
631 >>> tc.update([2] * 10)
632 >>> tc.get(1)
633 0
634 >>> tc.add(5)
635 >>> 5 in tc
636 True
637 >>> len(list(tc.elements()))
638 11
639
640 As you can see above, the API is kept similar to
641 :class:`collections.Counter`. The most notable feature omissions
642 being that counted items cannot be set directly, uncounted, or
643 removed, as this would disrupt the math.
644
645 Use the ThresholdCounter when you need best-effort long-lived
646 counts for dynamically-keyed data. Without a bounded datastructure
647 such as this one, the dynamic keys often represent a memory leak
648 and can impact application reliability. The ThresholdCounter's
649 item replacement strategy is fully deterministic and can be
650 thought of as *Amortized Least Relevant*. The absolute upper bound
651 of keys it will store is *(2/threshold)*, but realistically
652 *(1/threshold)* is expected for uniformly random datastreams, and
653 one or two orders of magnitude better for real-world data.
654
655 This algorithm is an implementation of the Lossy Counting
656 algorithm described in "Approximate Frequency Counts over Data
657 Streams" by Manku & Motwani. Hat tip to Kurt Rose for discovery
658 and initial implementation.
659
660 """
661 # TODO: hit_count/miss_count?
662 def __init__(self, threshold=0.001):
663 if not 0 < threshold < 1:
664 raise ValueError('expected threshold between 0 and 1, not: %r'
665 % threshold)
666
667 self.total = 0
668 self._count_map = {}
669 self._threshold = threshold
670 self._thresh_count = int(1 / threshold)
671 self._cur_bucket = 1
672
673 @property
674 def threshold(self):
675 return self._threshold
676
677 def add(self, key):
678 """Increment the count of *key* by 1, automatically adding it if it
679 does not exist.
680
681 Cache compaction is triggered every *1/threshold* additions.
682 """
683 self.total += 1
684 try:
685 self._count_map[key][0] += 1
686 except KeyError:
687 self._count_map[key] = [1, self._cur_bucket - 1]
688
689 if self.total % self._thresh_count == 0:
690 self._count_map = dict([(k, v) for k, v in self._count_map.items()
691 if sum(v) > self._cur_bucket])
692 self._cur_bucket += 1
693 return
694
695 def elements(self):
696 """Return an iterator of all the common elements tracked by the
697 counter. Yields each key as many times as it has been seen.
698 """
699 repeaters = itertools.starmap(itertools.repeat, self.iteritems())
700 return itertools.chain.from_iterable(repeaters)
701
702 def most_common(self, n=None):
703 """Get the top *n* keys and counts as tuples. If *n* is omitted,
704 returns all the pairs.
705 """
706 if n <= 0:
707 return []
708 ret = sorted(self.iteritems(), key=lambda x: x[1], reverse=True)
709 if n is None or n >= len(ret):
710 return ret
711 return ret[:n]
712
713 def get_common_count(self):
714 """Get the sum of counts for keys exceeding the configured data
715 threshold.
716 """
717 return sum([count for count, _ in self._count_map.values()])
718
719 def get_uncommon_count(self):
720 """Get the sum of counts for keys that were culled because the
721 associated counts represented less than the configured
722 threshold. The long-tail counts.
723 """
724 return self.total - self.get_common_count()
725
726 def get_commonality(self):
727 """Get a float representation of the effective count accuracy. The
728 higher the number, the less uniform the keys being added, and
729 the higher accuracy and efficiency of the ThresholdCounter.
730
731 If a stronger measure of data cardinality is required,
732 consider using hyperloglog.
733 """
734 return float(self.get_common_count()) / self.total
735
736 def __getitem__(self, key):
737 return self._count_map[key][0]
738
739 def __len__(self):
740 return len(self._count_map)
741
742 def __contains__(self, key):
743 return key in self._count_map
744
745 def iterkeys(self):
746 return iter(self._count_map)
747
748 def keys(self):
749 return list(self.iterkeys())
750
751 def itervalues(self):
752 count_map = self._count_map
753 for k in count_map:
754 yield count_map[k][0]
755
756 def values(self):
757 return list(self.itervalues())
758
759 def iteritems(self):
760 count_map = self._count_map
761 for k in count_map:
762 yield (k, count_map[k][0])
763
764 def items(self):
765 return list(self.iteritems())
766
767 def get(self, key, default=0):
768 "Get count for *key*, defaulting to 0."
769 try:
770 return self[key]
771 except KeyError:
772 return default
773
774 def update(self, iterable, **kwargs):
775 """Like dict.update() but add counts instead of replacing them, used
776 to add multiple items in one call.
777
778 Source can be an iterable of keys to add, or a mapping of keys
779 to integer counts.
780 """
781 if iterable is not None:
782 if callable(getattr(iterable, 'iteritems', None)):
783 for key, count in iterable.iteritems():
784 for i in xrange(count):
785 self.add(key)
786 else:
787 for key in iterable:
788 self.add(key)
789 if kwargs:
790 self.update(kwargs)
791
792
793 class MinIDMap(object):
794 """
795 Assigns arbitrary weakref-able objects the smallest possible unique
796 integer IDs, such that no two objects have the same ID at the same
797 time.
798
799 Maps arbitrary hashable objects to IDs.
800
801 Based on https://gist.github.com/kurtbrose/25b48114de216a5e55df
802 """
803 def __init__(self):
804 self.mapping = weakref.WeakKeyDictionary()
805 self.ref_map = {}
806 self.free = []
807
808 def get(self, a):
809 try:
810 return self.mapping[a][0] # if object is mapped, return ID
811 except KeyError:
812 pass
813
814 if self.free: # if there are any free IDs, use the smallest
815 nxt = heapq.heappop(self.free)
816 else: # if there are no free numbers, use the next highest ID
817 nxt = len(self.mapping)
818 ref = weakref.ref(a, self._clean)
819 self.mapping[a] = (nxt, ref)
820 self.ref_map[ref] = nxt
821 return nxt
822
823 def drop(self, a):
824 freed, ref = self.mapping[a]
825 del self.mapping[a]
826 del self.ref_map[ref]
827 heapq.heappush(self.free, freed)
828
829 def _clean(self, ref):
830 print(self.ref_map[ref])
831 heapq.heappush(self.free, self.ref_map[ref])
832 del self.ref_map[ref]
833
834 def __contains__(self, a):
835 return a in self.mapping
836
837 def __iter__(self):
838 return iter(self.mapping)
839
840 def __len__(self):
841 return self.mapping.__len__()
842
843 def iteritems(self):
844 return iter((k, self.mapping[k][0]) for k in iter(self.mapping))
845
846
847 # end cacheutils.py