""" compat ====== Cross-compatible functions for Python 2 and 3. Key items to import for 2/3 compatible code: * iterators: range(), map(), zip(), filter(), reduce() * lists: lrange(), lmap(), lzip(), lfilter() * unicode: u() [u"" is a syntax error in Python 3.0-3.2] * longs: long (int in Python 3) * callable * iterable method compatibility: iteritems, iterkeys, itervalues * Uses the original method if available, otherwise uses items, keys, values. * types: * text_type: unicode in Python 2, str in Python 3 * binary_type: str in Python 2, bytes in Python 3 * string_types: basestring in Python 2, str in Python 3 * bind_method: binds functions to classes Python 2.6 compatibility: * OrderedDict * Counter Other items: * OrderedDefaultDict """ # pylint disable=W0611 import functools import itertools from distutils.version import LooseVersion from itertools import product import sys import types PY3 = (sys.version_info[0] >= 3) # import iterator versions of these functions try: import __builtin__ as builtins # not writeable when instantiated with string, doesn't handle unicode well from cStringIO import StringIO as cStringIO # always writeable from StringIO import StringIO BytesIO = StringIO import cPickle except ImportError: import builtins from io import StringIO, BytesIO cStringIO = StringIO import pickle as cPickle if PY3: def isidentifier(s): return s.isidentifier() def str_to_bytes(s, encoding='ascii'): return s.encode(encoding) def bytes_to_str(b, encoding='utf-8'): return b.decode(encoding) # have to explicitly put builtins into the namespace range = range map = map zip = zip filter = filter reduce = functools.reduce long = int unichr = chr # list-producing versions of the major Python iterating functions def lrange(*args, **kwargs): return list(range(*args, **kwargs)) def lzip(*args, **kwargs): return list(zip(*args, **kwargs)) def lmap(*args, **kwargs): return list(map(*args, **kwargs)) def lfilter(*args, **kwargs): return list(filter(*args, **kwargs)) else: # Python 2 import re _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") def isidentifier(s, dotted=False): return bool(_name_re.match(s)) def str_to_bytes(s, encoding='ascii'): return s def bytes_to_str(b, encoding='ascii'): return b range = xrange zip = itertools.izip filter = itertools.ifilter map = itertools.imap reduce = reduce long = long unichr = unichr # Python 2-builtin ranges produce lists lrange = builtins.range lzip = builtins.zip lmap = builtins.map lfilter = builtins.filter def iteritems(obj, **kwargs): """replacement for six's iteritems for Python2/3 compat uses 'iteritems' if available and otherwise uses 'items'. Passes kwargs to method.""" func = getattr(obj, "iteritems", None) if not func: func = obj.items return func(**kwargs) def iterkeys(obj, **kwargs): func = getattr(obj, "iterkeys", None) if not func: func = obj.keys return func(**kwargs) def itervalues(obj, **kwargs): func = getattr(obj, "itervalues", None) if not func: func = obj.values return func(**kwargs) def bind_method(cls, name, func): """Bind a method to class, python 2 and python 3 compatible. Parameters ---------- cls : type class to receive bound method name : basestring name of method on class instance func : function function to be bound as method Returns ------- None """ # only python 2 has bound/unbound method issue if not PY3: setattr(cls, name, types.MethodType(func, None, cls)) else: setattr(cls, name, func) # ---------------------------------------------------------------------------- # functions largely based / taken from the six module # Much of the code in this module comes from Benjamin Peterson's six library. # The license for this library can be found in LICENSES/SIX and the code can be # found at https://bitbucket.org/gutworth/six if PY3: string_types = str, integer_types = int, class_types = type, text_type = str binary_type = bytes def u(s): return s else: string_types = basestring, integer_types = (int, long) class_types = (type, types.ClassType) text_type = unicode binary_type = str def u(s): return unicode(s, "unicode_escape") string_and_binary_types = string_types + (binary_type,) try: # callable reintroduced in later versions of Python callable = callable except NameError: def callable(obj): return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) # ---------------------------------------------------------------------------- # Python 2.6 compatibility shims # # OrderedDict Shim from Raymond Hettinger, python core dev # http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ # here to support versions before 2.6 if not PY3: # don't need this except in 2.6 try: from thread import get_ident as _get_ident except ImportError: from dummy_thread import get_ident as _get_ident try: from _abcoll import KeysView, ValuesView, ItemsView except ImportError: pass class _OrderedDict(dict): 'Dictionary that remembers insertion order' # An inherited dict maps keys to values. # The inherited dict provides __getitem__, __len__, __contains__, and get. # The remaining methods are order-aware. # Big-O running times for all methods are the same as for regular # dictionaries. # The internal self.__map dictionary maps keys to links in a doubly linked # list. The circular doubly linked list starts and ends with a sentinel # element. The sentinel element never gets deleted (this simplifies the # algorithm). Each link is stored as a list of length three: [PREV, NEXT, # KEY]. def __init__(self, *args, **kwds): '''Initialize an ordered dictionary. Signature is the same as for regular dictionaries, but keyword arguments are not recommended because their insertion order is arbitrary. ''' if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) try: self.__root except AttributeError: self.__root = root = [] # sentinel node root[:] = [root, root, None] self.__map = {} self.__update(*args, **kwds) def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 'od.__setitem__(i, y) <==> od[i]=y' # Setting a new item creates a new link which goes at the end of the # linked list, and the inherited dictionary is updated with the new # key/value pair. if key not in self: root = self.__root last = root[0] last[1] = root[0] = self.__map[key] = [last, root, key] dict_setitem(self, key, value) def __delitem__(self, key, dict_delitem=dict.__delitem__): 'od.__delitem__(y) <==> del od[y]' # Deleting an existing item uses self.__map to find the link which is # then removed by updating the links in the predecessor and successor # nodes. dict_delitem(self, key) link_prev, link_next, key = self.__map.pop(key) link_prev[1] = link_next link_next[0] = link_prev def __iter__(self): 'od.__iter__() <==> iter(od)' root = self.__root curr = root[1] while curr is not root: yield curr[2] curr = curr[1] def __reversed__(self): 'od.__reversed__() <==> reversed(od)' root = self.__root curr = root[0] while curr is not root: yield curr[2] curr = curr[0] def clear(self): 'od.clear() -> None. Remove all items from od.' try: for node in itervalues(self.__map): del node[:] root = self.__root root[:] = [root, root, None] self.__map.clear() except AttributeError: pass dict.clear(self) def popitem(self, last=True): '''od.popitem() -> (k, v), return and remove a (key, value) pair. Pairs are returned in LIFO order if last is true or FIFO order if false. ''' if not self: raise KeyError('dictionary is empty') root = self.__root if last: link = root[0] link_prev = link[0] link_prev[1] = root root[0] = link_prev else: link = root[1] link_next = link[1] root[1] = link_next link_next[0] = root key = link[2] del self.__map[key] value = dict.pop(self, key) return key, value # -- the following methods do not depend on the internal structure -- def keys(self): 'od.keys() -> list of keys in od' return list(self) def values(self): 'od.values() -> list of values in od' return [self[key] for key in self] def items(self): 'od.items() -> list of (key, value) pairs in od' return [(key, self[key]) for key in self] def iterkeys(self): 'od.iterkeys() -> an iterator over the keys in od' return iter(self) def itervalues(self): 'od.itervalues -> an iterator over the values in od' for k in self: yield self[k] def iteritems(self): 'od.iteritems -> an iterator over the (key, value) items in od' for k in self: yield (k, self[k]) def update(*args, **kwds): '''od.update(E, **F) -> None. Update od from dict/iterable E and F. If E is a dict instance, does: for k in E: od[k] = E[k] If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] Or if E is an iterable of items, does:for k, v in E: od[k] = v In either case, this is followed by: for k, v in F.items(): od[k] = v ''' if len(args) > 2: raise TypeError('update() takes at most 2 positional ' 'arguments (%d given)' % (len(args),)) elif not args: raise TypeError('update() takes at least 1 argument (0 given)') self = args[0] # Make progressively weaker assumptions about "other" other = () if len(args) == 2: other = args[1] if isinstance(other, dict): for key in other: self[key] = other[key] elif hasattr(other, 'keys'): for key in other.keys(): self[key] = other[key] else: for key, value in other: self[key] = value for key, value in kwds.items(): self[key] = value # let subclasses override update without breaking __init__ __update = update __marker = object() def pop(self, key, default=__marker): '''od.pop(k[,d]) -> v, remove specified key and return the\ corresponding value. If key is not found, d is returned if given, otherwise KeyError is raised. ''' if key in self: result = self[key] del self[key] return result if default is self.__marker: raise KeyError(key) return default def setdefault(self, key, default=None): 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' if key in self: return self[key] self[key] = default return default def __repr__(self, _repr_running={}): 'od.__repr__() <==> repr(od)' call_key = id(self), _get_ident() if call_key in _repr_running: return '...' _repr_running[call_key] = 1 try: if not self: return '%s()' % (self.__class__.__name__,) return '%s(%r)' % (self.__class__.__name__, list(self.items())) finally: del _repr_running[call_key] def __reduce__(self): 'Return state information for pickling' items = [[k, self[k]] for k in self] inst_dict = vars(self).copy() for k in vars(OrderedDict()): inst_dict.pop(k, None) if inst_dict: return (self.__class__, (items,), inst_dict) return self.__class__, (items,) def copy(self): 'od.copy() -> a shallow copy of od' return self.__class__(self) @classmethod def fromkeys(cls, iterable, value=None): '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and values equal to v (which defaults to None). ''' d = cls() for key in iterable: d[key] = value return d def __eq__(self, other): '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive while comparison to a regular mapping is order-insensitive. ''' if isinstance(other, OrderedDict): return (len(self) == len(other) and list(self.items()) == list(other.items())) return dict.__eq__(self, other) def __ne__(self, other): return not self == other # -- the following methods are only used in Python 2.7 -- def viewkeys(self): "od.viewkeys() -> a set-like object providing a view on od's keys" return KeysView(self) def viewvalues(self): "od.viewvalues() -> an object providing a view on od's values" return ValuesView(self) def viewitems(self): "od.viewitems() -> a set-like object providing a view on od's items" return ItemsView(self) # {{{ http://code.activestate.com/recipes/576611/ (r11) try: from operator import itemgetter from heapq import nlargest except ImportError: pass class _Counter(dict): '''Dict subclass for counting hashable objects. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts are stored as dictionary values. >>> Counter('zyzygy') Counter({'y': 3, 'z': 2, 'g': 1}) ''' def __init__(self, iterable=None, **kwds): '''Create a new, empty Counter object. And if given, count elements from an input iterable. Or, initialize the count from another mapping of elements to their counts. >>> c = Counter() # a new, empty counter >>> c = Counter('gallahad') # a new counter from an iterable >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping >>> c = Counter(a=4, b=2) # a new counter from keyword args ''' self.update(iterable, **kwds) def __missing__(self, key): return 0 def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abracadabra').most_common(3) [('a', 5), ('r', 2), ('b', 2)] ''' if n is None: return sorted(iteritems(self), key=itemgetter(1), reverse=True) return nlargest(n, iteritems(self), key=itemgetter(1)) def elements(self): '''Iterator over elements repeating each as many times as its count. >>> c = Counter('ABCABC') >>> sorted(c.elements()) ['A', 'A', 'B', 'B', 'C', 'C'] If an element's count has been set to zero or is a negative number, elements() will ignore it. ''' for elem, count in iteritems(self): for _ in range(count): yield elem # Override dict methods where the meaning changes for Counter objects. @classmethod def fromkeys(cls, iterable, v=None): raise NotImplementedError( 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') def update(self, iterable=None, **kwds): '''Like dict.update() but add counts instead of replacing them. Source can be an iterable, a dictionary, or another Counter instance. >>> c = Counter('which') >>> c.update('witch') # add elements from another iterable >>> d = Counter('watch') >>> c.update(d) # add elements from another counter >>> c['h'] # four 'h' in which, witch, and watch 4 ''' if iterable is not None: if hasattr(iterable, 'iteritems'): if self: self_get = self.get for elem, count in iteritems(iterable): self[elem] = self_get(elem, 0) + count else: dict.update( self, iterable) # fast path when counter is empty else: self_get = self.get for elem in iterable: self[elem] = self_get(elem, 0) + 1 if kwds: self.update(kwds) def copy(self): 'Like dict.copy() but returns a Counter instance instead of a dict.' return Counter(self) def __delitem__(self, elem): '''Like dict.__delitem__() but does not raise KeyError for missing values.''' if elem in self: dict.__delitem__(self, elem) def __repr__(self): if not self: return '%s()' % self.__class__.__name__ items = ', '.join(map('%r: %r'.__mod__, self.most_common())) return '%s({%s})' % (self.__class__.__name__, items) # Multiset-style mathematical operations discussed in: # Knuth TAOCP Volume II section 4.6.3 exercise 19 # and at http://en.wikipedia.org/wiki/Multiset # # Outputs guaranteed to only include positive counts. # # To strip negative and zero counts, add-in an empty counter: # c += Counter() def __add__(self, other): '''Add counts from two counters. >>> Counter('abbb') + Counter('bcc') Counter({'b': 4, 'c': 2, 'a': 1}) ''' if not isinstance(other, Counter): return NotImplemented result = Counter() for elem in set(self) | set(other): newcount = self[elem] + other[elem] if newcount > 0: result[elem] = newcount return result def __sub__(self, other): ''' Subtract count, but keep only results with positive counts. >>> Counter('abbbc') - Counter('bccd') Counter({'b': 2, 'a': 1}) ''' if not isinstance(other, Counter): return NotImplemented result = Counter() for elem in set(self) | set(other): newcount = self[elem] - other[elem] if newcount > 0: result[elem] = newcount return result def __or__(self, other): '''Union is the maximum of value in either of the input counters. >>> Counter('abbb') | Counter('bcc') Counter({'b': 3, 'c': 2, 'a': 1}) ''' if not isinstance(other, Counter): return NotImplemented _max = max result = Counter() for elem in set(self) | set(other): newcount = _max(self[elem], other[elem]) if newcount > 0: result[elem] = newcount return result def __and__(self, other): ''' Intersection is the minimum of corresponding counts. >>> Counter('abbb') & Counter('bcc') Counter({'b': 1}) ''' if not isinstance(other, Counter): return NotImplemented _min = min result = Counter() if len(self) < len(other): self, other = other, self for elem in filter(self.__contains__, other): newcount = _min(self[elem], other[elem]) if newcount > 0: result[elem] = newcount return result if sys.version_info[:2] < (2, 7): OrderedDict = _OrderedDict Counter = _Counter else: from collections import OrderedDict, Counter # http://stackoverflow.com/questions/4126348 # Thanks to @martineau at SO from dateutil import parser as _date_parser import dateutil if LooseVersion(dateutil.__version__) < '2.0': @functools.wraps(_date_parser.parse) def parse_date(timestr, *args, **kwargs): timestr = bytes(timestr) return _date_parser.parse(timestr, *args, **kwargs) else: parse_date = _date_parser.parse class OrderedDefaultdict(OrderedDict): def __init__(self, *args, **kwargs): newdefault = None newargs = () if args: newdefault = args[0] if not (newdefault is None or callable(newdefault)): raise TypeError('first argument must be callable or None') newargs = args[1:] self.default_factory = newdefault super(self.__class__, self).__init__(*newargs, **kwargs) def __missing__(self, key): if self.default_factory is None: raise KeyError(key) self[key] = value = self.default_factory() return value def __reduce__(self): # optional, for pickle support args = self.default_factory if self.default_factory else tuple() return type(self), args, None, None, list(self.items())