diff options
Diffstat (limited to 'Python/Dependencies/future-0.18.2/src/future/backports/urllib/parse.py')
| -rw-r--r-- | Python/Dependencies/future-0.18.2/src/future/backports/urllib/parse.py | 991 |
1 files changed, 0 insertions, 991 deletions
diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/urllib/parse.py b/Python/Dependencies/future-0.18.2/src/future/backports/urllib/parse.py deleted file mode 100644 index 04e52d4..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/urllib/parse.py +++ /dev/null @@ -1,991 +0,0 @@ -""" -Ported using Python-Future from the Python 3.3 standard library. - -Parse (absolute and relative) URLs. - -urlparse module is based upon the following RFC specifications. - -RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding -and L. Masinter, January 2005. - -RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter -and L.Masinter, December 1999. - -RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. -Berners-Lee, R. Fielding, and L. Masinter, August 1998. - -RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. - -RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June -1995. - -RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. -McCahill, December 1994 - -RFC 3986 is considered the current standard and any future changes to -urlparse module should conform with it. The urlparse module is -currently not entirely compliant with this RFC due to defacto -scenarios for parsing, and for backward compatibility purposes, some -parsing quirks from older RFCs are retained. The testcases in -test_urlparse.py provides a good indicator of parsing behavior. -""" -from __future__ import absolute_import, division, unicode_literals -from future.builtins import bytes, chr, dict, int, range, str -from future.utils import raise_with_traceback - -import re -import sys -import collections - -__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", - "urlsplit", "urlunsplit", "urlencode", "parse_qs", - "parse_qsl", "quote", "quote_plus", "quote_from_bytes", - "unquote", "unquote_plus", "unquote_to_bytes"] - -# A classification of schemes ('' means apply by default) -uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', - 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', '', 'sftp', - 'svn', 'svn+ssh'] -uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', - 'imap', 'wais', 'file', 'mms', 'https', 'shttp', - 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', - 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] -uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', - 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', - 'mms', '', 'sftp', 'tel'] - -# These are not actually used anymore, but should stay for backwards -# compatibility. (They are undocumented, but have a public-looking name.) -non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] -uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] -uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] - -# Characters valid in scheme names -scheme_chars = ('abcdefghijklmnopqrstuvwxyz' - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - '0123456789' - '+-.') - -# XXX: Consider replacing with functools.lru_cache -MAX_CACHE_SIZE = 20 -_parse_cache = {} - -def clear_cache(): - """Clear the parse cache and the quoters cache.""" - _parse_cache.clear() - _safe_quoters.clear() - - -# Helpers for bytes handling -# For 3.2, we deliberately require applications that -# handle improperly quoted URLs to do their own -# decoding and encoding. If valid use cases are -# presented, we may relax this by using latin-1 -# decoding internally for 3.3 -_implicit_encoding = 'ascii' -_implicit_errors = 'strict' - -def _noop(obj): - return obj - -def _encode_result(obj, encoding=_implicit_encoding, - errors=_implicit_errors): - return obj.encode(encoding, errors) - -def _decode_args(args, encoding=_implicit_encoding, - errors=_implicit_errors): - return tuple(x.decode(encoding, errors) if x else '' for x in args) - -def _coerce_args(*args): - # Invokes decode if necessary to create str args - # and returns the coerced inputs along with - # an appropriate result coercion function - # - noop for str inputs - # - encoding function otherwise - str_input = isinstance(args[0], str) - for arg in args[1:]: - # We special-case the empty string to support the - # "scheme=''" default argument to some functions - if arg and isinstance(arg, str) != str_input: - raise TypeError("Cannot mix str and non-str arguments") - if str_input: - return args + (_noop,) - return _decode_args(args) + (_encode_result,) - -# Result objects are more helpful than simple tuples -class _ResultMixinStr(object): - """Standard approach to encoding parsed results from str to bytes""" - __slots__ = () - - def encode(self, encoding='ascii', errors='strict'): - return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) - - -class _ResultMixinBytes(object): - """Standard approach to decoding parsed results from bytes to str""" - __slots__ = () - - def decode(self, encoding='ascii', errors='strict'): - return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) - - -class _NetlocResultMixinBase(object): - """Shared methods for the parsed result objects containing a netloc element""" - __slots__ = () - - @property - def username(self): - return self._userinfo[0] - - @property - def password(self): - return self._userinfo[1] - - @property - def hostname(self): - hostname = self._hostinfo[0] - if not hostname: - hostname = None - elif hostname is not None: - hostname = hostname.lower() - return hostname - - @property - def port(self): - port = self._hostinfo[1] - if port is not None: - port = int(port, 10) - # Return None on an illegal port - if not ( 0 <= port <= 65535): - return None - return port - - -class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): - __slots__ = () - - @property - def _userinfo(self): - netloc = self.netloc - userinfo, have_info, hostinfo = netloc.rpartition('@') - if have_info: - username, have_password, password = userinfo.partition(':') - if not have_password: - password = None - else: - username = password = None - return username, password - - @property - def _hostinfo(self): - netloc = self.netloc - _, _, hostinfo = netloc.rpartition('@') - _, have_open_br, bracketed = hostinfo.partition('[') - if have_open_br: - hostname, _, port = bracketed.partition(']') - _, have_port, port = port.partition(':') - else: - hostname, have_port, port = hostinfo.partition(':') - if not have_port: - port = None - return hostname, port - - -class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): - __slots__ = () - - @property - def _userinfo(self): - netloc = self.netloc - userinfo, have_info, hostinfo = netloc.rpartition(b'@') - if have_info: - username, have_password, password = userinfo.partition(b':') - if not have_password: - password = None - else: - username = password = None - return username, password - - @property - def _hostinfo(self): - netloc = self.netloc - _, _, hostinfo = netloc.rpartition(b'@') - _, have_open_br, bracketed = hostinfo.partition(b'[') - if have_open_br: - hostname, _, port = bracketed.partition(b']') - _, have_port, port = port.partition(b':') - else: - hostname, have_port, port = hostinfo.partition(b':') - if not have_port: - port = None - return hostname, port - - -from collections import namedtuple - -_DefragResultBase = namedtuple('DefragResult', 'url fragment') -_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') -_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') - -# For backwards compatibility, alias _NetlocResultMixinStr -# ResultBase is no longer part of the documented API, but it is -# retained since deprecating it isn't worth the hassle -ResultBase = _NetlocResultMixinStr - -# Structured result objects for string data -class DefragResult(_DefragResultBase, _ResultMixinStr): - __slots__ = () - def geturl(self): - if self.fragment: - return self.url + '#' + self.fragment - else: - return self.url - -class SplitResult(_SplitResultBase, _NetlocResultMixinStr): - __slots__ = () - def geturl(self): - return urlunsplit(self) - -class ParseResult(_ParseResultBase, _NetlocResultMixinStr): - __slots__ = () - def geturl(self): - return urlunparse(self) - -# Structured result objects for bytes data -class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): - __slots__ = () - def geturl(self): - if self.fragment: - return self.url + b'#' + self.fragment - else: - return self.url - -class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): - __slots__ = () - def geturl(self): - return urlunsplit(self) - -class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): - __slots__ = () - def geturl(self): - return urlunparse(self) - -# Set up the encode/decode result pairs -def _fix_result_transcoding(): - _result_pairs = ( - (DefragResult, DefragResultBytes), - (SplitResult, SplitResultBytes), - (ParseResult, ParseResultBytes), - ) - for _decoded, _encoded in _result_pairs: - _decoded._encoded_counterpart = _encoded - _encoded._decoded_counterpart = _decoded - -_fix_result_transcoding() -del _fix_result_transcoding - -def urlparse(url, scheme='', allow_fragments=True): - """Parse a URL into 6 components: - <scheme>://<netloc>/<path>;<params>?<query>#<fragment> - Return a 6-tuple: (scheme, netloc, path, params, query, fragment). - Note that we don't break the components up in smaller bits - (e.g. netloc is a single string) and we don't expand % escapes.""" - url, scheme, _coerce_result = _coerce_args(url, scheme) - splitresult = urlsplit(url, scheme, allow_fragments) - scheme, netloc, url, query, fragment = splitresult - if scheme in uses_params and ';' in url: - url, params = _splitparams(url) - else: - params = '' - result = ParseResult(scheme, netloc, url, params, query, fragment) - return _coerce_result(result) - -def _splitparams(url): - if '/' in url: - i = url.find(';', url.rfind('/')) - if i < 0: - return url, '' - else: - i = url.find(';') - return url[:i], url[i+1:] - -def _splitnetloc(url, start=0): - delim = len(url) # position of end of domain part of url, default is end - for c in '/?#': # look for delimiters; the order is NOT important - wdelim = url.find(c, start) # find first of this delim - if wdelim >= 0: # if found - delim = min(delim, wdelim) # use earliest delim position - return url[start:delim], url[delim:] # return (domain, rest) - -def urlsplit(url, scheme='', allow_fragments=True): - """Parse a URL into 5 components: - <scheme>://<netloc>/<path>?<query>#<fragment> - Return a 5-tuple: (scheme, netloc, path, query, fragment). - Note that we don't break the components up in smaller bits - (e.g. netloc is a single string) and we don't expand % escapes.""" - url, scheme, _coerce_result = _coerce_args(url, scheme) - allow_fragments = bool(allow_fragments) - key = url, scheme, allow_fragments, type(url), type(scheme) - cached = _parse_cache.get(key, None) - if cached: - return _coerce_result(cached) - if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth - clear_cache() - netloc = query = fragment = '' - i = url.find(':') - if i > 0: - if url[:i] == 'http': # optimize the common case - scheme = url[:i].lower() - url = url[i+1:] - if url[:2] == '//': - netloc, url = _splitnetloc(url, 2) - if (('[' in netloc and ']' not in netloc) or - (']' in netloc and '[' not in netloc)): - raise ValueError("Invalid IPv6 URL") - if allow_fragments and '#' in url: - url, fragment = url.split('#', 1) - if '?' in url: - url, query = url.split('?', 1) - v = SplitResult(scheme, netloc, url, query, fragment) - _parse_cache[key] = v - return _coerce_result(v) - for c in url[:i]: - if c not in scheme_chars: - break - else: - # make sure "url" is not actually a port number (in which case - # "scheme" is really part of the path) - rest = url[i+1:] - if not rest or any(c not in '0123456789' for c in rest): - # not a port number - scheme, url = url[:i].lower(), rest - - if url[:2] == '//': - netloc, url = _splitnetloc(url, 2) - if (('[' in netloc and ']' not in netloc) or - (']' in netloc and '[' not in netloc)): - raise ValueError("Invalid IPv6 URL") - if allow_fragments and '#' in url: - url, fragment = url.split('#', 1) - if '?' in url: - url, query = url.split('?', 1) - v = SplitResult(scheme, netloc, url, query, fragment) - _parse_cache[key] = v - return _coerce_result(v) - -def urlunparse(components): - """Put a parsed URL back together again. This may result in a - slightly different, but equivalent URL, if the URL that was parsed - originally had redundant delimiters, e.g. a ? with an empty query - (the draft states that these are equivalent).""" - scheme, netloc, url, params, query, fragment, _coerce_result = ( - _coerce_args(*components)) - if params: - url = "%s;%s" % (url, params) - return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) - -def urlunsplit(components): - """Combine the elements of a tuple as returned by urlsplit() into a - complete URL as a string. The data argument can be any five-item iterable. - This may result in a slightly different, but equivalent URL, if the URL that - was parsed originally had unnecessary delimiters (for example, a ? with an - empty query; the RFC states that these are equivalent).""" - scheme, netloc, url, query, fragment, _coerce_result = ( - _coerce_args(*components)) - if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): - if url and url[:1] != '/': url = '/' + url - url = '//' + (netloc or '') + url - if scheme: - url = scheme + ':' + url - if query: - url = url + '?' + query - if fragment: - url = url + '#' + fragment - return _coerce_result(url) - -def urljoin(base, url, allow_fragments=True): - """Join a base URL and a possibly relative URL to form an absolute - interpretation of the latter.""" - if not base: - return url - if not url: - return base - base, url, _coerce_result = _coerce_args(base, url) - bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ - urlparse(base, '', allow_fragments) - scheme, netloc, path, params, query, fragment = \ - urlparse(url, bscheme, allow_fragments) - if scheme != bscheme or scheme not in uses_relative: - return _coerce_result(url) - if scheme in uses_netloc: - if netloc: - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) - netloc = bnetloc - if path[:1] == '/': - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) - if not path and not params: - path = bpath - params = bparams - if not query: - query = bquery - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) - segments = bpath.split('/')[:-1] + path.split('/') - # XXX The stuff below is bogus in various ways... - if segments[-1] == '.': - segments[-1] = '' - while '.' in segments: - segments.remove('.') - while 1: - i = 1 - n = len(segments) - 1 - while i < n: - if (segments[i] == '..' - and segments[i-1] not in ('', '..')): - del segments[i-1:i+1] - break - i = i+1 - else: - break - if segments == ['', '..']: - segments[-1] = '' - elif len(segments) >= 2 and segments[-1] == '..': - segments[-2:] = [''] - return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), - params, query, fragment))) - -def urldefrag(url): - """Removes any existing fragment from URL. - - Returns a tuple of the defragmented URL and the fragment. If - the URL contained no fragments, the second element is the - empty string. - """ - url, _coerce_result = _coerce_args(url) - if '#' in url: - s, n, p, a, q, frag = urlparse(url) - defrag = urlunparse((s, n, p, a, q, '')) - else: - frag = '' - defrag = url - return _coerce_result(DefragResult(defrag, frag)) - -_hexdig = '0123456789ABCDEFabcdef' -_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)])) - for a in _hexdig for b in _hexdig) - -def unquote_to_bytes(string): - """unquote_to_bytes('abc%20def') -> b'abc def'.""" - # Note: strings are encoded as UTF-8. This is only an issue if it contains - # unescaped non-ASCII characters, which URIs should not. - if not string: - # Is it a string-like object? - string.split - return bytes(b'') - if isinstance(string, str): - string = string.encode('utf-8') - ### For Python-Future: - # It is already a byte-string object, but force it to be newbytes here on - # Py2: - string = bytes(string) - ### - bits = string.split(b'%') - if len(bits) == 1: - return string - res = [bits[0]] - append = res.append - for item in bits[1:]: - try: - append(_hextobyte[item[:2]]) - append(item[2:]) - except KeyError: - append(b'%') - append(item) - return bytes(b'').join(res) - -_asciire = re.compile('([\x00-\x7f]+)') - -def unquote(string, encoding='utf-8', errors='replace'): - """Replace %xx escapes by their single-character equivalent. The optional - encoding and errors parameters specify how to decode percent-encoded - sequences into Unicode characters, as accepted by the bytes.decode() - method. - By default, percent-encoded sequences are decoded with UTF-8, and invalid - sequences are replaced by a placeholder character. - - unquote('abc%20def') -> 'abc def'. - """ - if '%' not in string: - string.split - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'replace' - bits = _asciire.split(string) - res = [bits[0]] - append = res.append - for i in range(1, len(bits), 2): - append(unquote_to_bytes(bits[i]).decode(encoding, errors)) - append(bits[i + 1]) - return ''.join(res) - -def parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - """Parse a query given as a string argument. - - Arguments: - - qs: percent-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - percent-encoded queries should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - encoding and errors: specify how to decode percent-encoded sequences - into Unicode characters, as accepted by the bytes.decode() method. - """ - parsed_result = {} - pairs = parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) - for name, value in pairs: - if name in parsed_result: - parsed_result[name].append(value) - else: - parsed_result[name] = [value] - return parsed_result - -def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - """Parse a query given as a string argument. - - Arguments: - - qs: percent-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - percent-encoded queries should be treated as blank strings. A - true value indicates that blanks should be retained as blank - strings. The default false value indicates that blank values - are to be ignored and treated as if they were not included. - - strict_parsing: flag indicating what to do with parsing errors. If - false (the default), errors are silently ignored. If true, - errors raise a ValueError exception. - - encoding and errors: specify how to decode percent-encoded sequences - into Unicode characters, as accepted by the bytes.decode() method. - - Returns a list, as G-d intended. - """ - qs, _coerce_result = _coerce_args(qs) - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError("bad query field: %r" % (name_value,)) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = nv[0].replace('+', ' ') - name = unquote(name, encoding=encoding, errors=errors) - name = _coerce_result(name) - value = nv[1].replace('+', ' ') - value = unquote(value, encoding=encoding, errors=errors) - value = _coerce_result(value) - r.append((name, value)) - return r - -def unquote_plus(string, encoding='utf-8', errors='replace'): - """Like unquote(), but also replace plus signs by spaces, as required for - unquoting HTML form values. - - unquote_plus('%7e/abc+def') -> '~/abc def' - """ - string = string.replace('+', ' ') - return unquote(string, encoding, errors) - -_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - b'abcdefghijklmnopqrstuvwxyz' - b'0123456789' - b'_.-')) -_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) -_safe_quoters = {} - -class Quoter(collections.defaultdict): - """A mapping from bytes (in range(0,256)) to strings. - - String values are percent-encoded byte values, unless the key < 128, and - in the "safe" set (either the specified safe set, or default set). - """ - # Keeps a cache internally, using defaultdict, for efficiency (lookups - # of cached keys don't call Python code at all). - def __init__(self, safe): - """safe: bytes object.""" - self.safe = _ALWAYS_SAFE.union(bytes(safe)) - - def __repr__(self): - # Without this, will just display as a defaultdict - return "<Quoter %r>" % dict(self) - - def __missing__(self, b): - # Handle a cache miss. Store quoted string in cache and return. - res = chr(b) if b in self.safe else '%{0:02X}'.format(b) - self[b] = res - return res - -def quote(string, safe='/', encoding=None, errors=None): - """quote('abc def') -> 'abc%20def' - - Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. - - RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists - the following reserved characters. - - reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | - "$" | "," - - Each of these characters is reserved in some component of a URL, - but not necessarily in all of them. - - By default, the quote function is intended for quoting the path - section of a URL. Thus, it will not encode '/'. This character - is reserved, but in typical usage the quote function is being - called on a path where the existing slash characters are used as - reserved characters. - - string and safe may be either str or bytes objects. encoding must - not be specified if string is a str. - - The optional encoding and errors parameters specify how to deal with - non-ASCII characters, as accepted by the str.encode method. - By default, encoding='utf-8' (characters are encoded with UTF-8), and - errors='strict' (unsupported characters raise a UnicodeEncodeError). - """ - if isinstance(string, str): - if not string: - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'strict' - string = string.encode(encoding, errors) - else: - if encoding is not None: - raise TypeError("quote() doesn't support 'encoding' for bytes") - if errors is not None: - raise TypeError("quote() doesn't support 'errors' for bytes") - return quote_from_bytes(string, safe) - -def quote_plus(string, safe='', encoding=None, errors=None): - """Like quote(), but also replace ' ' with '+', as required for quoting - HTML form values. Plus signs in the original string are escaped unless - they are included in safe. It also does not have safe default to '/'. - """ - # Check if ' ' in string, where string may either be a str or bytes. If - # there are no spaces, the regular quote will produce the right answer. - if ((isinstance(string, str) and ' ' not in string) or - (isinstance(string, bytes) and b' ' not in string)): - return quote(string, safe, encoding, errors) - if isinstance(safe, str): - space = str(' ') - else: - space = bytes(b' ') - string = quote(string, safe + space, encoding, errors) - return string.replace(' ', '+') - -def quote_from_bytes(bs, safe='/'): - """Like quote(), but accepts a bytes object rather than a str, and does - not perform string-to-bytes encoding. It always returns an ASCII string. - quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' - """ - if not isinstance(bs, (bytes, bytearray)): - raise TypeError("quote_from_bytes() expected bytes") - if not bs: - return str('') - ### For Python-Future: - bs = bytes(bs) - ### - if isinstance(safe, str): - # Normalize 'safe' by converting to bytes and removing non-ASCII chars - safe = str(safe).encode('ascii', 'ignore') - else: - ### For Python-Future: - safe = bytes(safe) - ### - safe = bytes([c for c in safe if c < 128]) - if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): - return bs.decode() - try: - quoter = _safe_quoters[safe] - except KeyError: - _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ - return str('').join([quoter(char) for char in bs]) - -def urlencode(query, doseq=False, safe='', encoding=None, errors=None): - """Encode a sequence of two-element tuples or dictionary into a URL query string. - - If any values in the query arg are sequences and doseq is true, each - sequence element is converted to a separate parameter. - - If the query arg is a sequence of two-element tuples, the order of the - parameters in the output will match the order of parameters in the - input. - - The query arg may be either a string or a bytes type. When query arg is a - string, the safe, encoding and error parameters are sent the quote_plus for - encoding. - """ - - if hasattr(query, "items"): - query = query.items() - else: - # It's a bother at times that strings and string-like objects are - # sequences. - try: - # non-sequence items should not work with len() - # non-empty strings will fail this - if len(query) and not isinstance(query[0], tuple): - raise TypeError - # Zero-length sequences of all types will get here and succeed, - # but that's a minor nit. Since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty, va, tb = sys.exc_info() - raise_with_traceback(TypeError("not a valid non-string sequence " - "or mapping object"), tb) - - l = [] - if not doseq: - for k, v in query: - if isinstance(k, bytes): - k = quote_plus(k, safe) - else: - k = quote_plus(str(k), safe, encoding, errors) - - if isinstance(v, bytes): - v = quote_plus(v, safe) - else: - v = quote_plus(str(v), safe, encoding, errors) - l.append(k + '=' + v) - else: - for k, v in query: - if isinstance(k, bytes): - k = quote_plus(k, safe) - else: - k = quote_plus(str(k), safe, encoding, errors) - - if isinstance(v, bytes): - v = quote_plus(v, safe) - l.append(k + '=' + v) - elif isinstance(v, str): - v = quote_plus(v, safe, encoding, errors) - l.append(k + '=' + v) - else: - try: - # Is this a sufficient test for sequence-ness? - x = len(v) - except TypeError: - # not a sequence - v = quote_plus(str(v), safe, encoding, errors) - l.append(k + '=' + v) - else: - # loop over the sequence - for elt in v: - if isinstance(elt, bytes): - elt = quote_plus(elt, safe) - else: - elt = quote_plus(str(elt), safe, encoding, errors) - l.append(k + '=' + elt) - return str('&').join(l) - -# Utilities to parse URLs (most of these return None for missing parts): -# unwrap('<URL:type://host/path>') --> 'type://host/path' -# splittype('type:opaquestring') --> 'type', 'opaquestring' -# splithost('//host[:port]/path') --> 'host[:port]', '/path' -# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' -# splitpasswd('user:passwd') -> 'user', 'passwd' -# splitport('host:port') --> 'host', 'port' -# splitquery('/path?query') --> '/path', 'query' -# splittag('/path#tag') --> '/path', 'tag' -# splitattr('/path;attr1=value1;attr2=value2;...') -> -# '/path', ['attr1=value1', 'attr2=value2', ...] -# splitvalue('attr=value') --> 'attr', 'value' -# urllib.parse.unquote('abc%20def') -> 'abc def' -# quote('abc def') -> 'abc%20def') - -def to_bytes(url): - """to_bytes(u"URL") --> 'URL'.""" - # Most URL schemes require ASCII. If that changes, the conversion - # can be relaxed. - # XXX get rid of to_bytes() - if isinstance(url, str): - try: - url = url.encode("ASCII").decode() - except UnicodeError: - raise UnicodeError("URL " + repr(url) + - " contains non-ASCII characters") - return url - -def unwrap(url): - """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" - url = str(url).strip() - if url[:1] == '<' and url[-1:] == '>': - url = url[1:-1].strip() - if url[:4] == 'URL:': url = url[4:].strip() - return url - -_typeprog = None -def splittype(url): - """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" - global _typeprog - if _typeprog is None: - import re - _typeprog = re.compile('^([^/:]+):') - - match = _typeprog.match(url) - if match: - scheme = match.group(1) - return scheme.lower(), url[len(scheme) + 1:] - return None, url - -_hostprog = None -def splithost(url): - """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" - global _hostprog - if _hostprog is None: - import re - _hostprog = re.compile('^//([^/?]*)(.*)$') - - match = _hostprog.match(url) - if match: - host_port = match.group(1) - path = match.group(2) - if path and not path.startswith('/'): - path = '/' + path - return host_port, path - return None, url - -_userprog = None -def splituser(host): - """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" - global _userprog - if _userprog is None: - import re - _userprog = re.compile('^(.*)@(.*)$') - - match = _userprog.match(host) - if match: return match.group(1, 2) - return None, host - -_passwdprog = None -def splitpasswd(user): - """splitpasswd('user:passwd') -> 'user', 'passwd'.""" - global _passwdprog - if _passwdprog is None: - import re - _passwdprog = re.compile('^([^:]*):(.*)$',re.S) - - match = _passwdprog.match(user) - if match: return match.group(1, 2) - return user, None - -# splittag('/path#tag') --> '/path', 'tag' -_portprog = None -def splitport(host): - """splitport('host:port') --> 'host', 'port'.""" - global _portprog - if _portprog is None: - import re - _portprog = re.compile('^(.*):([0-9]+)$') - - match = _portprog.match(host) - if match: return match.group(1, 2) - return host, None - -_nportprog = None -def splitnport(host, defport=-1): - """Split host and port, returning numeric port. - Return given default port if no ':' found; defaults to -1. - Return numerical port if a valid number are found after ':'. - Return None if ':' but not a valid number.""" - global _nportprog - if _nportprog is None: - import re - _nportprog = re.compile('^(.*):(.*)$') - - match = _nportprog.match(host) - if match: - host, port = match.group(1, 2) - try: - if not port: raise ValueError("no digits") - nport = int(port) - except ValueError: - nport = None - return host, nport - return host, defport - -_queryprog = None -def splitquery(url): - """splitquery('/path?query') --> '/path', 'query'.""" - global _queryprog - if _queryprog is None: - import re - _queryprog = re.compile('^(.*)\?([^?]*)$') - - match = _queryprog.match(url) - if match: return match.group(1, 2) - return url, None - -_tagprog = None -def splittag(url): - """splittag('/path#tag') --> '/path', 'tag'.""" - global _tagprog - if _tagprog is None: - import re - _tagprog = re.compile('^(.*)#([^#]*)$') - - match = _tagprog.match(url) - if match: return match.group(1, 2) - return url, None - -def splitattr(url): - """splitattr('/path;attr1=value1;attr2=value2;...') -> - '/path', ['attr1=value1', 'attr2=value2', ...].""" - words = url.split(';') - return words[0], words[1:] - -_valueprog = None -def splitvalue(attr): - """splitvalue('attr=value') --> 'attr', 'value'.""" - global _valueprog - if _valueprog is None: - import re - _valueprog = re.compile('^([^=]*)=(.*)$') - - match = _valueprog.match(attr) - if match: return match.group(1, 2) - return attr, None |
