diff options
| author | yum <yum.food.vr@gmail.com> | 2023-01-01 21:05:27 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-01-01 21:44:45 -0800 |
| commit | e25bdba3a3a53b09be5269d8b065c13b73ab55c3 (patch) | |
| tree | 1d1dc1d94cde92c2f4f8ce86017395054787515d /Python/Dependencies/future-0.18.2/src/future/backports/email | |
| parent | 0d408cc812a094a708edbe4baf536e928731cfc3 (diff) | |
Embed git in package
package.ps1 fetches PortableGit and embeds it in the package. This
eliminates all but one runtime dependency (MSVC++ Redistributable).
* Move Python into a new FOSS folder.
Diffstat (limited to 'Python/Dependencies/future-0.18.2/src/future/backports/email')
28 files changed, 0 insertions, 9461 deletions
diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/__init__.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/__init__.py deleted file mode 100644 index f9523bc..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/__init__.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -""" -Backport of the Python 3.3 email package for Python-Future. - -A package for parsing, handling, and generating email messages. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -# Install the surrogate escape handler here because this is used by many -# modules in the email package. -from future.utils import surrogateescape -surrogateescape.register_surrogateescape() -# (Should this be done globally by ``future``?) - - -__version__ = '5.1.0' - -__all__ = [ - 'base64mime', - 'charset', - 'encoders', - 'errors', - 'feedparser', - 'generator', - 'header', - 'iterators', - 'message', - 'message_from_file', - 'message_from_binary_file', - 'message_from_string', - 'message_from_bytes', - 'mime', - 'parser', - 'quoprimime', - 'utils', - ] - - - -# Some convenience routines. Don't import Parser and Message as side-effects -# of importing email since those cascadingly import most of the rest of the -# email package. -def message_from_string(s, *args, **kws): - """Parse a string into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import Parser - return Parser(*args, **kws).parsestr(s) - -def message_from_bytes(s, *args, **kws): - """Parse a bytes string into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import BytesParser - return BytesParser(*args, **kws).parsebytes(s) - -def message_from_file(fp, *args, **kws): - """Read a file and parse its contents into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import Parser - return Parser(*args, **kws).parse(fp) - -def message_from_binary_file(fp, *args, **kws): - """Read a binary file and parse its contents into a Message object model. - - Optional _class and strict are passed to the Parser constructor. - """ - from future.backports.email.parser import BytesParser - return BytesParser(*args, **kws).parse(fp) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/_encoded_words.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/_encoded_words.py deleted file mode 100644 index 7c4a529..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/_encoded_words.py +++ /dev/null @@ -1,232 +0,0 @@ -""" Routines for manipulating RFC2047 encoded words. - -This is currently a package-private API, but will be considered for promotion -to a public API if there is demand. - -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import bytes -from future.builtins import chr -from future.builtins import int -from future.builtins import str - -# An ecoded word looks like this: -# -# =?charset[*lang]?cte?encoded_string?= -# -# for more information about charset see the charset module. Here it is one -# of the preferred MIME charset names (hopefully; you never know when parsing). -# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In -# theory other letters could be used for other encodings, but in practice this -# (almost?) never happens. There could be a public API for adding entries -# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is -# Base64. The meaning of encoded_string should be obvious. 'lang' is optional -# as indicated by the brackets (they are not part of the syntax) but is almost -# never encountered in practice. -# -# The general interface for a CTE decoder is that it takes the encoded_string -# as its argument, and returns a tuple (cte_decoded_string, defects). The -# cte_decoded_string is the original binary that was encoded using the -# specified cte. 'defects' is a list of MessageDefect instances indicating any -# problems encountered during conversion. 'charset' and 'lang' are the -# corresponding strings extracted from the EW, case preserved. -# -# The general interface for a CTE encoder is that it takes a binary sequence -# as input and returns the cte_encoded_string, which is an ascii-only string. -# -# Each decoder must also supply a length function that takes the binary -# sequence as its argument and returns the length of the resulting encoded -# string. -# -# The main API functions for the module are decode, which calls the decoder -# referenced by the cte specifier, and encode, which adds the appropriate -# RFC 2047 "chrome" to the encoded string, and can optionally automatically -# select the shortest possible encoding. See their docstrings below for -# details. - -import re -import base64 -import binascii -import functools -from string import ascii_letters, digits -from future.backports.email import errors - -__all__ = ['decode_q', - 'encode_q', - 'decode_b', - 'encode_b', - 'len_q', - 'len_b', - 'decode', - 'encode', - ] - -# -# Quoted Printable -# - -# regex based decoder. -_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, - lambda m: bytes([int(m.group(1), 16)])) - -def decode_q(encoded): - encoded = bytes(encoded.replace(b'_', b' ')) - return _q_byte_subber(encoded), [] - - -# dict mapping bytes to their encoded form -class _QByteMap(dict): - - safe = bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')) - - def __missing__(self, key): - if key in self.safe: - self[key] = chr(key) - else: - self[key] = "={:02X}".format(key) - return self[key] - -_q_byte_map = _QByteMap() - -# In headers spaces are mapped to '_'. -_q_byte_map[ord(' ')] = '_' - -def encode_q(bstring): - return str(''.join(_q_byte_map[x] for x in bytes(bstring))) - -def len_q(bstring): - return sum(len(_q_byte_map[x]) for x in bytes(bstring)) - - -# -# Base64 -# - -def decode_b(encoded): - defects = [] - pad_err = len(encoded) % 4 - if pad_err: - defects.append(errors.InvalidBase64PaddingDefect()) - padded_encoded = encoded + b'==='[:4-pad_err] - else: - padded_encoded = encoded - try: - # The validate kwarg to b64decode is not supported in Py2.x - if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', padded_encoded): - raise binascii.Error('Non-base64 digit found') - return base64.b64decode(padded_encoded), defects - except binascii.Error: - # Since we had correct padding, this must an invalid char error. - defects = [errors.InvalidBase64CharactersDefect()] - # The non-alphabet characters are ignored as far as padding - # goes, but we don't know how many there are. So we'll just - # try various padding lengths until something works. - for i in 0, 1, 2, 3: - try: - return base64.b64decode(encoded+b'='*i), defects - except (binascii.Error, TypeError): # Py2 raises a TypeError - if i==0: - defects.append(errors.InvalidBase64PaddingDefect()) - else: - # This should never happen. - raise AssertionError("unexpected binascii.Error") - -def encode_b(bstring): - return base64.b64encode(bstring).decode('ascii') - -def len_b(bstring): - groups_of_3, leftover = divmod(len(bstring), 3) - # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. - return groups_of_3 * 4 + (4 if leftover else 0) - - -_cte_decoders = { - 'q': decode_q, - 'b': decode_b, - } - -def decode(ew): - """Decode encoded word and return (string, charset, lang, defects) tuple. - - An RFC 2047/2243 encoded word has the form: - - =?charset*lang?cte?encoded_string?= - - where '*lang' may be omitted but the other parts may not be. - - This function expects exactly such a string (that is, it does not check the - syntax and may raise errors if the string is not well formed), and returns - the encoded_string decoded first from its Content Transfer Encoding and - then from the resulting bytes into unicode using the specified charset. If - the cte-decoded string does not successfully decode using the specified - character set, a defect is added to the defects list and the unknown octets - are replaced by the unicode 'unknown' character \uFDFF. - - The specified charset and language are returned. The default for language, - which is rarely if ever encountered, is the empty string. - - """ - _, charset, cte, cte_string, _ = str(ew).split('?') - charset, _, lang = charset.partition('*') - cte = cte.lower() - # Recover the original bytes and do CTE decoding. - bstring = cte_string.encode('ascii', 'surrogateescape') - bstring, defects = _cte_decoders[cte](bstring) - # Turn the CTE decoded bytes into unicode. - try: - string = bstring.decode(charset) - except UnicodeError: - defects.append(errors.UndecodableBytesDefect("Encoded word " - "contains bytes not decodable using {} charset".format(charset))) - string = bstring.decode(charset, 'surrogateescape') - except LookupError: - string = bstring.decode('ascii', 'surrogateescape') - if charset.lower() != 'unknown-8bit': - defects.append(errors.CharsetError("Unknown charset {} " - "in encoded word; decoded as unknown bytes".format(charset))) - return string, charset, lang, defects - - -_cte_encoders = { - 'q': encode_q, - 'b': encode_b, - } - -_cte_encode_length = { - 'q': len_q, - 'b': len_b, - } - -def encode(string, charset='utf-8', encoding=None, lang=''): - """Encode string using the CTE encoding that produces the shorter result. - - Produces an RFC 2047/2243 encoded word of the form: - - =?charset*lang?cte?encoded_string?= - - where '*lang' is omitted unless the 'lang' parameter is given a value. - Optional argument charset (defaults to utf-8) specifies the charset to use - to encode the string to binary before CTE encoding it. Optional argument - 'encoding' is the cte specifier for the encoding that should be used ('q' - or 'b'); if it is None (the default) the encoding which produces the - shortest encoded sequence is used, except that 'q' is preferred if it is up - to five characters longer. Optional argument 'lang' (default '') gives the - RFC 2243 language string to specify in the encoded word. - - """ - string = str(string) - if charset == 'unknown-8bit': - bstring = string.encode('ascii', 'surrogateescape') - else: - bstring = string.encode(charset) - if encoding is None: - qlen = _cte_encode_length['q'](bstring) - blen = _cte_encode_length['b'](bstring) - # Bias toward q. 5 is arbitrary. - encoding = 'q' if qlen - blen < 5 else 'b' - encoded = _cte_encoders[encoding](bstring) - if lang: - lang = '*' + lang - return "=?{0}{1}?{2}?{3}?=".format(charset, lang, encoding, encoded) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/_header_value_parser.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/_header_value_parser.py deleted file mode 100644 index 43957ed..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/_header_value_parser.py +++ /dev/null @@ -1,2965 +0,0 @@ -"""Header value parser implementing various email-related RFC parsing rules. - -The parsing methods defined in this module implement various email related -parsing rules. Principal among them is RFC 5322, which is the followon -to RFC 2822 and primarily a clarification of the former. It also implements -RFC 2047 encoded word decoding. - -RFC 5322 goes to considerable trouble to maintain backward compatibility with -RFC 822 in the parse phase, while cleaning up the structure on the generation -phase. This parser supports correct RFC 5322 generation by tagging white space -as folding white space only when folding is allowed in the non-obsolete rule -sets. Actually, the parser is even more generous when accepting input than RFC -5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages. -Where possible deviations from the standard are annotated on the 'defects' -attribute of tokens that deviate. - -The general structure of the parser follows RFC 5322, and uses its terminology -where there is a direct correspondence. Where the implementation requires a -somewhat different structure than that used by the formal grammar, new terms -that mimic the closest existing terms are used. Thus, it really helps to have -a copy of RFC 5322 handy when studying this code. - -Input to the parser is a string that has already been unfolded according to -RFC 5322 rules. According to the RFC this unfolding is the very first step, and -this parser leaves the unfolding step to a higher level message parser, which -will have already detected the line breaks that need unfolding while -determining the beginning and end of each header. - -The output of the parser is a TokenList object, which is a list subclass. A -TokenList is a recursive data structure. The terminal nodes of the structure -are Terminal objects, which are subclasses of str. These do not correspond -directly to terminal objects in the formal grammar, but are instead more -practical higher level combinations of true terminals. - -All TokenList and Terminal objects have a 'value' attribute, which produces the -semantically meaningful value of that part of the parse subtree. The value of -all whitespace tokens (no matter how many sub-tokens they may contain) is a -single space, as per the RFC rules. This includes 'CFWS', which is herein -included in the general class of whitespace tokens. There is one exception to -the rule that whitespace tokens are collapsed into single spaces in values: in -the value of a 'bare-quoted-string' (a quoted-string with no leading or -trailing whitespace), any whitespace that appeared between the quotation marks -is preserved in the returned value. Note that in all Terminal strings quoted -pairs are turned into their unquoted values. - -All TokenList and Terminal objects also have a string value, which attempts to -be a "canonical" representation of the RFC-compliant form of the substring that -produced the parsed subtree, including minimal use of quoted pair quoting. -Whitespace runs are not collapsed. - -Comment tokens also have a 'content' attribute providing the string found -between the parens (including any nested comments) with whitespace preserved. - -All TokenList and Terminal objects have a 'defects' attribute which is a -possibly empty list all of the defects found while creating the token. Defects -may appear on any token in the tree, and a composite list of all defects in the -subtree is available through the 'all_defects' attribute of any node. (For -Terminal notes x.defects == x.all_defects.) - -Each object in a parse tree is called a 'token', and each has a 'token_type' -attribute that gives the name from the RFC 5322 grammar that it represents. -Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that -may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters. -It is returned in place of lists of (ctext/quoted-pair) and -(qtext/quoted-pair). - -XXX: provide complete list of token types. -""" -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import int, range, str, super, list - -import re -from collections import namedtuple, OrderedDict - -from future.backports.urllib.parse import (unquote, unquote_to_bytes) -from future.backports.email import _encoded_words as _ew -from future.backports.email import errors -from future.backports.email import utils - -# -# Useful constants and functions -# - -WSP = set(' \t') -CFWS_LEADER = WSP | set('(') -SPECIALS = set(r'()<>@,:;.\"[]') -ATOM_ENDS = SPECIALS | WSP -DOT_ATOM_ENDS = ATOM_ENDS - set('.') -# '.', '"', and '(' do not end phrases in order to support obs-phrase -PHRASE_ENDS = SPECIALS - set('."(') -TSPECIALS = (SPECIALS | set('/?=')) - set('.') -TOKEN_ENDS = TSPECIALS | WSP -ASPECIALS = TSPECIALS | set("*'%") -ATTRIBUTE_ENDS = ASPECIALS | WSP -EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') - -def quote_string(value): - return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' - -# -# Accumulator for header folding -# - -class _Folded(object): - - def __init__(self, maxlen, policy): - self.maxlen = maxlen - self.policy = policy - self.lastlen = 0 - self.stickyspace = None - self.firstline = True - self.done = [] - self.current = list() # uses l.clear() - - def newline(self): - self.done.extend(self.current) - self.done.append(self.policy.linesep) - self.current.clear() - self.lastlen = 0 - - def finalize(self): - if self.current: - self.newline() - - def __str__(self): - return ''.join(self.done) - - def append(self, stoken): - self.current.append(stoken) - - def append_if_fits(self, token, stoken=None): - if stoken is None: - stoken = str(token) - l = len(stoken) - if self.stickyspace is not None: - stickyspace_len = len(self.stickyspace) - if self.lastlen + stickyspace_len + l <= self.maxlen: - self.current.append(self.stickyspace) - self.lastlen += stickyspace_len - self.current.append(stoken) - self.lastlen += l - self.stickyspace = None - self.firstline = False - return True - if token.has_fws: - ws = token.pop_leading_fws() - if ws is not None: - self.stickyspace += str(ws) - stickyspace_len += len(ws) - token._fold(self) - return True - if stickyspace_len and l + 1 <= self.maxlen: - margin = self.maxlen - l - if 0 < margin < stickyspace_len: - trim = stickyspace_len - margin - self.current.append(self.stickyspace[:trim]) - self.stickyspace = self.stickyspace[trim:] - stickyspace_len = trim - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.lastlen = l + stickyspace_len - self.stickyspace = None - self.firstline = False - return True - if not self.firstline: - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.stickyspace = None - self.firstline = False - return True - if self.lastlen + l <= self.maxlen: - self.current.append(stoken) - self.lastlen += l - return True - if l < self.maxlen: - self.newline() - self.current.append(stoken) - self.lastlen = l - return True - return False - -# -# TokenList and its subclasses -# - -class TokenList(list): - - token_type = None - - def __init__(self, *args, **kw): - super(TokenList, self).__init__(*args, **kw) - self.defects = [] - - def __str__(self): - return ''.join(str(x) for x in self) - - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, - super(TokenList, self).__repr__()) - - @property - def value(self): - return ''.join(x.value for x in self if x.value) - - @property - def all_defects(self): - return sum((x.all_defects for x in self), self.defects) - - # - # Folding API - # - # parts(): - # - # return a list of objects that constitute the "higher level syntactic - # objects" specified by the RFC as the best places to fold a header line. - # The returned objects must include leading folding white space, even if - # this means mutating the underlying parse tree of the object. Each object - # is only responsible for returning *its* parts, and should not drill down - # to any lower level except as required to meet the leading folding white - # space constraint. - # - # _fold(folded): - # - # folded: the result accumulator. This is an instance of _Folded. - # (XXX: I haven't finished factoring this out yet, the folding code - # pretty much uses this as a state object.) When the folded.current - # contains as much text as will fit, the _fold method should call - # folded.newline. - # folded.lastlen: the current length of the test stored in folded.current. - # folded.maxlen: The maximum number of characters that may appear on a - # folded line. Differs from the policy setting in that "no limit" is - # represented by +inf, which means it can be used in the trivially - # logical fashion in comparisons. - # - # Currently no subclasses implement parts, and I think this will remain - # true. A subclass only needs to implement _fold when the generic version - # isn't sufficient. _fold will need to be implemented primarily when it is - # possible for encoded words to appear in the specialized token-list, since - # there is no generic algorithm that can know where exactly the encoded - # words are allowed. A _fold implementation is responsible for filling - # lines in the same general way that the top level _fold does. It may, and - # should, call the _fold method of sub-objects in a similar fashion to that - # of the top level _fold. - # - # XXX: I'm hoping it will be possible to factor the existing code further - # to reduce redundancy and make the logic clearer. - - @property - def parts(self): - klass = self.__class__ - this = list() - for token in self: - if token.startswith_fws(): - if this: - yield this[0] if len(this)==1 else klass(this) - this.clear() - end_ws = token.pop_trailing_ws() - this.append(token) - if end_ws: - yield klass(this) - this = [end_ws] - if this: - yield this[0] if len(this)==1 else klass(this) - - def startswith_fws(self): - return self[0].startswith_fws() - - def pop_leading_fws(self): - if self[0].token_type == 'fws': - return self.pop(0) - return self[0].pop_leading_fws() - - def pop_trailing_ws(self): - if self[-1].token_type == 'cfws': - return self.pop(-1) - return self[-1].pop_trailing_ws() - - @property - def has_fws(self): - for part in self: - if part.has_fws: - return True - return False - - def has_leading_comment(self): - return self[0].has_leading_comment() - - @property - def comments(self): - comments = [] - for token in self: - comments.extend(token.comments) - return comments - - def fold(self, **_3to2kwargs): - # max_line_length 0/None means no limit, ie: infinitely long. - policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - maxlen = policy.max_line_length or float("+inf") - folded = _Folded(maxlen, policy) - self._fold(folded) - folded.finalize() - return str(folded) - - def as_encoded_word(self, charset): - # This works only for things returned by 'parts', which include - # the leading fws, if any, that should be used. - res = [] - ws = self.pop_leading_fws() - if ws: - res.append(ws) - trailer = self.pop(-1) if self[-1].token_type=='fws' else '' - res.append(_ew.encode(str(self), charset)) - res.append(trailer) - return ''.join(res) - - def cte_encode(self, charset, policy): - res = [] - for part in self: - res.append(part.cte_encode(charset, policy)) - return ''.join(res) - - def _fold(self, folded): - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - try: - str(part).encode('us-ascii') - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - # XXX: this should be a policy setting - charset = 'utf-8' - tstr = part.cte_encode(charset, folded.policy) - tlen = len(tstr) - if folded.append_if_fits(part, tstr): - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - # Peel off the leading whitespace and make it sticky, to - # avoid infinite recursion. - folded.stickyspace = str(part.pop(0)) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # There are no fold points in this one; it is too long for a single - # line and can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() - - def pprint(self, indent=''): - print('\n'.join(self._pp(indent=''))) - - def ppstr(self, indent=''): - return '\n'.join(self._pp(indent='')) - - def _pp(self, indent=''): - yield '{}{}/{}('.format( - indent, - self.__class__.__name__, - self.token_type) - for token in self: - if not hasattr(token, '_pp'): - yield (indent + ' !! invalid element in token ' - 'list: {!r}'.format(token)) - else: - for line in token._pp(indent+' '): - yield line - if self.defects: - extra = ' Defects: {}'.format(self.defects) - else: - extra = '' - yield '{}){}'.format(indent, extra) - - -class WhiteSpaceTokenList(TokenList): - - @property - def value(self): - return ' ' - - @property - def comments(self): - return [x.content for x in self if x.token_type=='comment'] - - -class UnstructuredTokenList(TokenList): - - token_type = 'unstructured' - - def _fold(self, folded): - if any(x.token_type=='encoded-word' for x in self): - return self._fold_encoded(folded) - # Here we can have either a pure ASCII string that may or may not - # have surrogateescape encoded bytes, or a unicode string. - last_ew = None - for part in self.parts: - tstr = str(part) - is_ew = False - try: - str(part).encode('us-ascii') - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None: - # We've already done an EW, combine this one with it - # if there's room. - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - oldlastlen = sum(len(x) for x in folded.current[:last_ew]) - schunk = str(chunk) - lchunk = len(schunk) - if oldlastlen + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = oldlastlen + lchunk - continue - tstr = part.as_encoded_word(charset) - is_ew = True - if folded.append_if_fits(part, tstr): - if is_ew: - last_ew = len(folded.current) - 1 - continue - if is_ew or last_ew: - # It's too big to fit on the line, but since we've - # got encoded words we can use encoded word folding. - part._fold_as_ew(folded) - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - folded.stickyspace = str(ws) - if folded.append_if_fits(part): - continue - if part.has_fws: - part.fold(folded) - continue - # It can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() - last_ew = None - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - if last_ew is None: - res.append(part.cte_encode(charset, policy)) - last_ew = len(res) - else: - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res.append(tl.as_encoded_word()) - return ''.join(res) - - -class Phrase(TokenList): - - token_type = 'phrase' - - def _fold(self, folded): - # As with Unstructured, we can have pure ASCII with or without - # surrogateescape encoded bytes, or we could have unicode. But this - # case is more complicated, since we have to deal with the various - # sub-token types and how they can be composed in the face of - # unicode-that-needs-CTE-encoding, and the fact that if a token a - # comment that becomes a barrier across which we can't compose encoded - # words. - last_ew = None - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - has_ew = False - try: - str(part).encode('us-ascii') - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None and not part.has_leading_comment(): - # We've already done an EW, let's see if we can combine - # this one with it. The last_ew logic ensures that all we - # have at this point is atoms, no comments or quoted - # strings. So we can treat the text between the last - # encoded word and the content of this token as - # unstructured text, and things will work correctly. But - # we have to strip off any trailing comment on this token - # first, and if it is a quoted string we have to pull out - # the content (we're encoding it, so it no longer needs to - # be quoted). - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - schunk = str(chunk) - lchunk = len(schunk) - if last_ew + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = sum(len(x) for x in folded.current) - continue - tstr = part.as_encoded_word(charset) - tlen = len(tstr) - has_ew = True - if folded.append_if_fits(part, tstr): - if has_ew and not part.comments: - last_ew = len(folded.current) - 1 - elif part.comments or part.token_type == 'quoted-string': - # If a comment is involved we can't combine EWs. And if a - # quoted string is involved, it's not worth the effort to - # try to combine them. - last_ew = None - continue - part._fold(folded) - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - is_ew = False - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - is_ew = True - if last_ew is None: - if not part.comments: - last_ew = len(res) - res.append(part.cte_encode(charset, policy)) - elif not part.has_leading_comment(): - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res[last_ew:] = [tl.as_encoded_word(charset)] - if part.comments or (not is_ew and part.token_type == 'quoted-string'): - last_ew = None - return ''.join(res) - -class Word(TokenList): - - token_type = 'word' - - -class CFWSList(WhiteSpaceTokenList): - - token_type = 'cfws' - - def has_leading_comment(self): - return bool(self.comments) - - -class Atom(TokenList): - - token_type = 'atom' - - -class Token(TokenList): - - token_type = 'token' - - -class EncodedWord(TokenList): - - token_type = 'encoded-word' - cte = None - charset = None - lang = None - - @property - def encoded(self): - if self.cte is not None: - return self.cte - _ew.encode(str(self), self.charset) - - - -class QuotedString(TokenList): - - token_type = 'quoted-string' - - @property - def content(self): - for x in self: - if x.token_type == 'bare-quoted-string': - return x.value - - @property - def quoted_value(self): - res = [] - for x in self: - if x.token_type == 'bare-quoted-string': - res.append(str(x)) - else: - res.append(x.value) - return ''.join(res) - - @property - def stripped_value(self): - for token in self: - if token.token_type == 'bare-quoted-string': - return token.value - - -class BareQuotedString(QuotedString): - - token_type = 'bare-quoted-string' - - def __str__(self): - return quote_string(''.join(str(x) for x in self)) - - @property - def value(self): - return ''.join(str(x) for x in self) - - -class Comment(WhiteSpaceTokenList): - - token_type = 'comment' - - def __str__(self): - return ''.join(sum([ - ["("], - [self.quote(x) for x in self], - [")"], - ], [])) - - def quote(self, value): - if value.token_type == 'comment': - return str(value) - return str(value).replace('\\', '\\\\').replace( - '(', '\(').replace( - ')', '\)') - - @property - def content(self): - return ''.join(str(x) for x in self) - - @property - def comments(self): - return [self.content] - -class AddressList(TokenList): - - token_type = 'address-list' - - @property - def addresses(self): - return [x for x in self if x.token_type=='address'] - - @property - def mailboxes(self): - return sum((x.mailboxes - for x in self if x.token_type=='address'), []) - - @property - def all_mailboxes(self): - return sum((x.all_mailboxes - for x in self if x.token_type=='address'), []) - - -class Address(TokenList): - - token_type = 'address' - - @property - def display_name(self): - if self[0].token_type == 'group': - return self[0].display_name - - @property - def mailboxes(self): - if self[0].token_type == 'mailbox': - return [self[0]] - elif self[0].token_type == 'invalid-mailbox': - return [] - return self[0].mailboxes - - @property - def all_mailboxes(self): - if self[0].token_type == 'mailbox': - return [self[0]] - elif self[0].token_type == 'invalid-mailbox': - return [self[0]] - return self[0].all_mailboxes - -class MailboxList(TokenList): - - token_type = 'mailbox-list' - - @property - def mailboxes(self): - return [x for x in self if x.token_type=='mailbox'] - - @property - def all_mailboxes(self): - return [x for x in self - if x.token_type in ('mailbox', 'invalid-mailbox')] - - -class GroupList(TokenList): - - token_type = 'group-list' - - @property - def mailboxes(self): - if not self or self[0].token_type != 'mailbox-list': - return [] - return self[0].mailboxes - - @property - def all_mailboxes(self): - if not self or self[0].token_type != 'mailbox-list': - return [] - return self[0].all_mailboxes - - -class Group(TokenList): - - token_type = "group" - - @property - def mailboxes(self): - if self[2].token_type != 'group-list': - return [] - return self[2].mailboxes - - @property - def all_mailboxes(self): - if self[2].token_type != 'group-list': - return [] - return self[2].all_mailboxes - - @property - def display_name(self): - return self[0].display_name - - -class NameAddr(TokenList): - - token_type = 'name-addr' - - @property - def display_name(self): - if len(self) == 1: - return None - return self[0].display_name - - @property - def local_part(self): - return self[-1].local_part - - @property - def domain(self): - return self[-1].domain - - @property - def route(self): - return self[-1].route - - @property - def addr_spec(self): - return self[-1].addr_spec - - -class AngleAddr(TokenList): - - token_type = 'angle-addr' - - @property - def local_part(self): - for x in self: - if x.token_type == 'addr-spec': - return x.local_part - - @property - def domain(self): - for x in self: - if x.token_type == 'addr-spec': - return x.domain - - @property - def route(self): - for x in self: - if x.token_type == 'obs-route': - return x.domains - - @property - def addr_spec(self): - for x in self: - if x.token_type == 'addr-spec': - return x.addr_spec - else: - return '<>' - - -class ObsRoute(TokenList): - - token_type = 'obs-route' - - @property - def domains(self): - return [x.domain for x in self if x.token_type == 'domain'] - - -class Mailbox(TokenList): - - token_type = 'mailbox' - - @property - def display_name(self): - if self[0].token_type == 'name-addr': - return self[0].display_name - - @property - def local_part(self): - return self[0].local_part - - @property - def domain(self): - return self[0].domain - - @property - def route(self): - if self[0].token_type == 'name-addr': - return self[0].route - - @property - def addr_spec(self): - return self[0].addr_spec - - -class InvalidMailbox(TokenList): - - token_type = 'invalid-mailbox' - - @property - def display_name(self): - return None - - local_part = domain = route = addr_spec = display_name - - -class Domain(TokenList): - - token_type = 'domain' - - @property - def domain(self): - return ''.join(super(Domain, self).value.split()) - - -class DotAtom(TokenList): - - token_type = 'dot-atom' - - -class DotAtomText(TokenList): - - token_type = 'dot-atom-text' - - -class AddrSpec(TokenList): - - token_type = 'addr-spec' - - @property - def local_part(self): - return self[0].local_part - - @property - def domain(self): - if len(self) < 3: - return None - return self[-1].domain - - @property - def value(self): - if len(self) < 3: - return self[0].value - return self[0].value.rstrip()+self[1].value+self[2].value.lstrip() - - @property - def addr_spec(self): - nameset = set(self.local_part) - if len(nameset) > len(nameset-DOT_ATOM_ENDS): - lp = quote_string(self.local_part) - else: - lp = self.local_part - if self.domain is not None: - return lp + '@' + self.domain - return lp - - -class ObsLocalPart(TokenList): - - token_type = 'obs-local-part' - - -class DisplayName(Phrase): - - token_type = 'display-name' - - @property - def display_name(self): - res = TokenList(self) - if res[0].token_type == 'cfws': - res.pop(0) - else: - if res[0][0].token_type == 'cfws': - res[0] = TokenList(res[0][1:]) - if res[-1].token_type == 'cfws': - res.pop() - else: - if res[-1][-1].token_type == 'cfws': - res[-1] = TokenList(res[-1][:-1]) - return res.value - - @property - def value(self): - quote = False - if self.defects: - quote = True - else: - for x in self: - if x.token_type == 'quoted-string': - quote = True - if quote: - pre = post = '' - if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': - pre = ' ' - if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': - post = ' ' - return pre+quote_string(self.display_name)+post - else: - return super(DisplayName, self).value - - -class LocalPart(TokenList): - - token_type = 'local-part' - - @property - def value(self): - if self[0].token_type == "quoted-string": - return self[0].quoted_value - else: - return self[0].value - - @property - def local_part(self): - # Strip whitespace from front, back, and around dots. - res = [DOT] - last = DOT - last_is_tl = False - for tok in self[0] + [DOT]: - if tok.token_type == 'cfws': - continue - if (last_is_tl and tok.token_type == 'dot' and - last[-1].token_type == 'cfws'): - res[-1] = TokenList(last[:-1]) - is_tl = isinstance(tok, TokenList) - if (is_tl and last.token_type == 'dot' and - tok[0].token_type == 'cfws'): - res.append(TokenList(tok[1:])) - else: - res.append(tok) - last = res[-1] - last_is_tl = is_tl - res = TokenList(res[1:-1]) - return res.value - - -class DomainLiteral(TokenList): - - token_type = 'domain-literal' - - @property - def domain(self): - return ''.join(super(DomainLiteral, self).value.split()) - - @property - def ip(self): - for x in self: - if x.token_type == 'ptext': - return x.value - - -class MIMEVersion(TokenList): - - token_type = 'mime-version' - major = None - minor = None - - -class Parameter(TokenList): - - token_type = 'parameter' - sectioned = False - extended = False - charset = 'us-ascii' - - @property - def section_number(self): - # Because the first token, the attribute (name) eats CFWS, the second - # token is always the section if there is one. - return self[1].number if self.sectioned else 0 - - @property - def param_value(self): - # This is part of the "handle quoted extended parameters" hack. - for token in self: - if token.token_type == 'value': - return token.stripped_value - if token.token_type == 'quoted-string': - for token in token: - if token.token_type == 'bare-quoted-string': - for token in token: - if token.token_type == 'value': - return token.stripped_value - return '' - - -class InvalidParameter(Parameter): - - token_type = 'invalid-parameter' - - -class Attribute(TokenList): - - token_type = 'attribute' - - @property - def stripped_value(self): - for token in self: - if token.token_type.endswith('attrtext'): - return token.value - -class Section(TokenList): - - token_type = 'section' - number = None - - -class Value(TokenList): - - token_type = 'value' - - @property - def stripped_value(self): - token = self[0] - if token.token_type == 'cfws': - token = self[1] - if token.token_type.endswith( - ('quoted-string', 'attribute', 'extended-attribute')): - return token.stripped_value - return self.value - - -class MimeParameters(TokenList): - - token_type = 'mime-parameters' - - @property - def params(self): - # The RFC specifically states that the ordering of parameters is not - # guaranteed and may be reordered by the transport layer. So we have - # to assume the RFC 2231 pieces can come in any order. However, we - # output them in the order that we first see a given name, which gives - # us a stable __str__. - params = OrderedDict() - for token in self: - if not token.token_type.endswith('parameter'): - continue - if token[0].token_type != 'attribute': - continue - name = token[0].value.strip() - if name not in params: - params[name] = [] - params[name].append((token.section_number, token)) - for name, parts in params.items(): - parts = sorted(parts) - # XXX: there might be more recovery we could do here if, for - # example, this is really a case of a duplicate attribute name. - value_parts = [] - charset = parts[0][1].charset - for i, (section_number, param) in enumerate(parts): - if section_number != i: - param.defects.append(errors.InvalidHeaderDefect( - "inconsistent multipart parameter numbering")) - value = param.param_value - if param.extended: - try: - value = unquote_to_bytes(value) - except UnicodeEncodeError: - # source had surrogate escaped bytes. What we do now - # is a bit of an open question. I'm not sure this is - # the best choice, but it is what the old algorithm did - value = unquote(value, encoding='latin-1') - else: - try: - value = value.decode(charset, 'surrogateescape') - except LookupError: - # XXX: there should really be a custom defect for - # unknown character set to make it easy to find, - # because otherwise unknown charset is a silent - # failure. - value = value.decode('us-ascii', 'surrogateescape') - if utils._has_surrogates(value): - param.defects.append(errors.UndecodableBytesDefect()) - value_parts.append(value) - value = ''.join(value_parts) - yield name, value - - def __str__(self): - params = [] - for name, value in self.params: - if value: - params.append('{}={}'.format(name, quote_string(value))) - else: - params.append(name) - params = '; '.join(params) - return ' ' + params if params else '' - - -class ParameterizedHeaderValue(TokenList): - - @property - def params(self): - for token in reversed(self): - if token.token_type == 'mime-parameters': - return token.params - return {} - - @property - def parts(self): - if self and self[-1].token_type == 'mime-parameters': - # We don't want to start a new line if all of the params don't fit - # after the value, so unwrap the parameter list. - return TokenList(self[:-1] + self[-1]) - return TokenList(self).parts - - -class ContentType(ParameterizedHeaderValue): - - token_type = 'content-type' - maintype = 'text' - subtype = 'plain' - - -class ContentDisposition(ParameterizedHeaderValue): - - token_type = 'content-disposition' - content_disposition = None - - -class ContentTransferEncoding(TokenList): - - token_type = 'content-transfer-encoding' - cte = '7bit' - - -class HeaderLabel(TokenList): - - token_type = 'header-label' - - -class Header(TokenList): - - token_type = 'header' - - def _fold(self, folded): - folded.append(str(self.pop(0))) - folded.lastlen = len(folded.current[0]) - # The first line of the header is different from all others: we don't - # want to start a new object on a new line if it has any fold points in - # it that would allow part of it to be on the first header line. - # Further, if the first fold point would fit on the new line, we want - # to do that, but if it doesn't we want to put it on the first line. - # Folded supports this via the stickyspace attribute. If this - # attribute is not None, it does the special handling. - folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else '' - rest = self.pop(0) - if self: - raise ValueError("Malformed Header token list") - rest._fold(folded) - - -# -# Terminal classes and instances -# - -class Terminal(str): - - def __new__(cls, value, token_type): - self = super(Terminal, cls).__new__(cls, value) - self.token_type = token_type - self.defects = [] - return self - - def __repr__(self): - return "{}({})".format(self.__class__.__name__, super(Terminal, self).__repr__()) - - @property - def all_defects(self): - return list(self.defects) - - def _pp(self, indent=''): - return ["{}{}/{}({}){}".format( - indent, - self.__class__.__name__, - self.token_type, - super(Terminal, self).__repr__(), - '' if not self.defects else ' {}'.format(self.defects), - )] - - def cte_encode(self, charset, policy): - value = str(self) - try: - value.encode('us-ascii') - return value - except UnicodeEncodeError: - return _ew.encode(value, charset) - - def pop_trailing_ws(self): - # This terminates the recursion. - return None - - def pop_leading_fws(self): - # This terminates the recursion. - return None - - @property - def comments(self): - return [] - - def has_leading_comment(self): - return False - - def __getnewargs__(self): - return(str(self), self.token_type) - - -class WhiteSpaceTerminal(Terminal): - - @property - def value(self): - return ' ' - - def startswith_fws(self): - return True - - has_fws = True - - -class ValueTerminal(Terminal): - - @property - def value(self): - return self - - def startswith_fws(self): - return False - - has_fws = False - - def as_encoded_word(self, charset): - return _ew.encode(str(self), charset) - - -class EWWhiteSpaceTerminal(WhiteSpaceTerminal): - - @property - def value(self): - return '' - - @property - def encoded(self): - return self[:] - - def __str__(self): - return '' - - has_fws = True - - -# XXX these need to become classes and used as instances so -# that a program can't change them in a parse tree and screw -# up other parse trees. Maybe should have tests for that, too. -DOT = ValueTerminal('.', 'dot') -ListSeparator = ValueTerminal(',', 'list-separator') -RouteComponentMarker = ValueTerminal('@', 'route-component-marker') - -# -# Parser -# - -"""Parse strings according to RFC822/2047/2822/5322 rules. - -This is a stateless parser. Each get_XXX function accepts a string and -returns either a Terminal or a TokenList representing the RFC object named -by the method and a string containing the remaining unparsed characters -from the input. Thus a parser method consumes the next syntactic construct -of a given type and returns a token representing the construct plus the -unparsed remainder of the input string. - -For example, if the first element of a structured header is a 'phrase', -then: - - phrase, value = get_phrase(value) - -returns the complete phrase from the start of the string value, plus any -characters left in the string after the phrase is removed. - -""" - -_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split -_non_atom_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match -_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall -_non_token_end_matcher = re.compile(r"[^{}]+".format( - ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match -_non_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match -_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(EXTENDED_ATTRIBUTE_ENDS).replace( - '\\','\\\\').replace(']','\]'))).match - -def _validate_xtext(xtext): - """If input token contains ASCII non-printables, register a defect.""" - - non_printables = _non_printable_finder(xtext) - if non_printables: - xtext.defects.append(errors.NonPrintableDefect(non_printables)) - if utils._has_surrogates(xtext): - xtext.defects.append(errors.UndecodableBytesDefect( - "Non-ASCII characters found in header token")) - -def _get_ptext_to_endchars(value, endchars): - """Scan printables/quoted-pairs until endchars and return unquoted ptext. - - This function turns a run of qcontent, ccontent-without-comments, or - dtext-with-quoted-printables into a single string by unquoting any - quoted printables. It returns the string, the remaining value, and - a flag that is True iff there were any quoted printables decoded. - - """ - _3to2list = list(_wsp_splitter(value, 1)) - fragment, remainder, = _3to2list[:1] + [_3to2list[1:]] - vchars = [] - escape = False - had_qp = False - for pos in range(len(fragment)): - if fragment[pos] == '\\': - if escape: - escape = False - had_qp = True - else: - escape = True - continue - if escape: - escape = False - elif fragment[pos] in endchars: - break - vchars.append(fragment[pos]) - else: - pos = pos + 1 - return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp - -def _decode_ew_run(value): - """ Decode a run of RFC2047 encoded words. - - _decode_ew_run(value) -> (text, value, defects) - - Scans the supplied value for a run of tokens that look like they are RFC - 2047 encoded words, decodes those words into text according to RFC 2047 - rules (whitespace between encoded words is discarded), and returns the text - and the remaining value (including any leading whitespace on the remaining - value), as well as a list of any defects encountered while decoding. The - input value may not have any leading whitespace. - - """ - res = [] - defects = [] - last_ws = '' - while value: - try: - tok, ws, value = _wsp_splitter(value, 1) - except ValueError: - tok, ws, value = value, '', '' - if not (tok.startswith('=?') and tok.endswith('?=')): - return ''.join(res), last_ws + tok + ws + value, defects - text, charset, lang, new_defects = _ew.decode(tok) - res.append(text) - defects.extend(new_defects) - last_ws = ws - return ''.join(res), last_ws, defects - -def get_fws(value): - """FWS = 1*WSP - - This isn't the RFC definition. We're using fws to represent tokens where - folding can be done, but when we are parsing the *un*folding has already - been done so we don't need to watch out for CRLF. - - """ - newvalue = value.lstrip() - fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') - return fws, newvalue - -def get_encoded_word(value): - """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" - - """ - ew = EncodedWord() - if not value.startswith('=?'): - raise errors.HeaderParseError( - "expected encoded word but found {}".format(value)) - _3to2list1 = list(value[2:].split('?=', 1)) - tok, remainder, = _3to2list1[:1] + [_3to2list1[1:]] - if tok == value[2:]: - raise errors.HeaderParseError( - "expected encoded word but found {}".format(value)) - remstr = ''.join(remainder) - if remstr[:2].isdigit(): - _3to2list3 = list(remstr.split('?=', 1)) - rest, remainder, = _3to2list3[:1] + [_3to2list3[1:]] - tok = tok + '?=' + rest - if len(tok.split()) > 1: - ew.defects.append(errors.InvalidHeaderDefect( - "whitespace inside encoded word")) - ew.cte = value - value = ''.join(remainder) - try: - text, charset, lang, defects = _ew.decode('=?' + tok + '?=') - except ValueError: - raise errors.HeaderParseError( - "encoded word format invalid: '{}'".format(ew.cte)) - ew.charset = charset - ew.lang = lang - ew.defects.extend(defects) - while text: - if text[0] in WSP: - token, text = get_fws(text) - ew.append(token) - continue - _3to2list5 = list(_wsp_splitter(text, 1)) - chars, remainder, = _3to2list5[:1] + [_3to2list5[1:]] - vtext = ValueTerminal(chars, 'vtext') - _validate_xtext(vtext) - ew.append(vtext) - text = ''.join(remainder) - return ew, value - -def get_unstructured(value): - """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct - obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS) - obs-utext = %d0 / obs-NO-WS-CTL / LF / CR - - obs-NO-WS-CTL is control characters except WSP/CR/LF. - - So, basically, we have printable runs, plus control characters or nulls in - the obsolete syntax, separated by whitespace. Since RFC 2047 uses the - obsolete syntax in its specification, but requires whitespace on either - side of the encoded words, I can see no reason to need to separate the - non-printable-non-whitespace from the printable runs if they occur, so we - parse this into xtext tokens separated by WSP tokens. - - Because an 'unstructured' value must by definition constitute the entire - value, this 'get' routine does not return a remaining value, only the - parsed TokenList. - - """ - # XXX: but what about bare CR and LF? They might signal the start or - # end of an encoded word. YAGNI for now, since out current parsers - # will never send us strings with bard CR or LF. - - unstructured = UnstructuredTokenList() - while value: - if value[0] in WSP: - token, value = get_fws(value) - unstructured.append(token) - continue - if value.startswith('=?'): - try: - token, value = get_encoded_word(value) - except errors.HeaderParseError: - pass - else: - have_ws = True - if len(unstructured) > 0: - if unstructured[-1].token_type != 'fws': - unstructured.defects.append(errors.InvalidHeaderDefect( - "missing whitespace before encoded word")) - have_ws = False - if have_ws and len(unstructured) > 1: - if unstructured[-2].token_type == 'encoded-word': - unstructured[-1] = EWWhiteSpaceTerminal( - unstructured[-1], 'fws') - unstructured.append(token) - continue - _3to2list7 = list(_wsp_splitter(value, 1)) - tok, remainder, = _3to2list7[:1] + [_3to2list7[1:]] - vtext = ValueTerminal(tok, 'vtext') - _validate_xtext(vtext) - unstructured.append(vtext) - value = ''.join(remainder) - return unstructured - -def get_qp_ctext(value): - """ctext = <printable ascii except \ ( )> - - This is not the RFC ctext, since we are handling nested comments in comment - and unquoting quoted-pairs here. We allow anything except the '()' - characters, but if we find any ASCII other than the RFC defined printable - ASCII an NonPrintableDefect is added to the token's defects list. Since - quoted pairs are converted to their unquoted values, what is returned is - a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value - is ' '. - - """ - ptext, value, _ = _get_ptext_to_endchars(value, '()') - ptext = WhiteSpaceTerminal(ptext, 'ptext') - _validate_xtext(ptext) - return ptext, value - -def get_qcontent(value): - """qcontent = qtext / quoted-pair - - We allow anything except the DQUOTE character, but if we find any ASCII - other than the RFC defined printable ASCII an NonPrintableDefect is - added to the token's defects list. Any quoted pairs are converted to their - unquoted values, so what is returned is a 'ptext' token. In this case it - is a ValueTerminal. - - """ - ptext, value, _ = _get_ptext_to_endchars(value, '"') - ptext = ValueTerminal(ptext, 'ptext') - _validate_xtext(ptext) - return ptext, value - -def get_atext(value): - """atext = <matches _atext_matcher> - - We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to - the token's defects list if we find non-atext characters. - """ - m = _non_atom_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected atext but found '{}'".format(value)) - atext = m.group() - value = value[len(atext):] - atext = ValueTerminal(atext, 'atext') - _validate_xtext(atext) - return atext, value - -def get_bare_quoted_string(value): - """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE - - A quoted-string without the leading or trailing white space. Its - value is the text between the quote marks, with whitespace - preserved and quoted pairs decoded. - """ - if value[0] != '"': - raise errors.HeaderParseError( - "expected '\"' but found '{}'".format(value)) - bare_quoted_string = BareQuotedString() - value = value[1:] - while value and value[0] != '"': - if value[0] in WSP: - token, value = get_fws(value) - else: - token, value = get_qcontent(value) - bare_quoted_string.append(token) - if not value: - bare_quoted_string.defects.append(errors.InvalidHeaderDefect( - "end of header inside quoted string")) - return bare_quoted_string, value - return bare_quoted_string, value[1:] - -def get_comment(value): - """comment = "(" *([FWS] ccontent) [FWS] ")" - ccontent = ctext / quoted-pair / comment - - We handle nested comments here, and quoted-pair in our qp-ctext routine. - """ - if value and value[0] != '(': - raise errors.HeaderParseError( - "expected '(' but found '{}'".format(value)) - comment = Comment() - value = value[1:] - while value and value[0] != ")": - if value[0] in WSP: - token, value = get_fws(value) - elif value[0] == '(': - token, value = get_comment(value) - else: - token, value = get_qp_ctext(value) - comment.append(token) - if not value: - comment.defects.append(errors.InvalidHeaderDefect( - "end of header inside comment")) - return comment, value - return comment, value[1:] - -def get_cfws(value): - """CFWS = (1*([FWS] comment) [FWS]) / FWS - - """ - cfws = CFWSList() - while value and value[0] in CFWS_LEADER: - if value[0] in WSP: - token, value = get_fws(value) - else: - token, value = get_comment(value) - cfws.append(token) - return cfws, value - -def get_quoted_string(value): - """quoted-string = [CFWS] <bare-quoted-string> [CFWS] - - 'bare-quoted-string' is an intermediate class defined by this - parser and not by the RFC grammar. It is the quoted string - without any attached CFWS. - """ - quoted_string = QuotedString() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - quoted_string.append(token) - token, value = get_bare_quoted_string(value) - quoted_string.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - quoted_string.append(token) - return quoted_string, value - -def get_atom(value): - """atom = [CFWS] 1*atext [CFWS] - - """ - atom = Atom() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - atom.append(token) - if value and value[0] in ATOM_ENDS: - raise errors.HeaderParseError( - "expected atom but found '{}'".format(value)) - token, value = get_atext(value) - atom.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - atom.append(token) - return atom, value - -def get_dot_atom_text(value): - """ dot-text = 1*atext *("." 1*atext) - - """ - dot_atom_text = DotAtomText() - if not value or value[0] in ATOM_ENDS: - raise errors.HeaderParseError("expected atom at a start of " - "dot-atom-text but found '{}'".format(value)) - while value and value[0] not in ATOM_ENDS: - token, value = get_atext(value) - dot_atom_text.append(token) - if value and value[0] == '.': - dot_atom_text.append(DOT) - value = value[1:] - if dot_atom_text[-1] is DOT: - raise errors.HeaderParseError("expected atom at end of dot-atom-text " - "but found '{}'".format('.'+value)) - return dot_atom_text, value - -def get_dot_atom(value): - """ dot-atom = [CFWS] dot-atom-text [CFWS] - - """ - dot_atom = DotAtom() - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - dot_atom.append(token) - token, value = get_dot_atom_text(value) - dot_atom.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - dot_atom.append(token) - return dot_atom, value - -def get_word(value): - """word = atom / quoted-string - - Either atom or quoted-string may start with CFWS. We have to peel off this - CFWS first to determine which type of word to parse. Afterward we splice - the leading CFWS, if any, into the parsed sub-token. - - If neither an atom or a quoted-string is found before the next special, a - HeaderParseError is raised. - - The token returned is either an Atom or a QuotedString, as appropriate. - This means the 'word' level of the formal grammar is not represented in the - parse tree; this is because having that extra layer when manipulating the - parse tree is more confusing than it is helpful. - - """ - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - else: - leader = None - if value[0]=='"': - token, value = get_quoted_string(value) - elif value[0] in SPECIALS: - raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' " - "but found '{}'".format(value)) - else: - token, value = get_atom(value) - if leader is not None: - token[:0] = [leader] - return token, value - -def get_phrase(value): - """ phrase = 1*word / obs-phrase - obs-phrase = word *(word / "." / CFWS) - - This means a phrase can be a sequence of words, periods, and CFWS in any - order as long as it starts with at least one word. If anything other than - words is detected, an ObsoleteHeaderDefect is added to the token's defect - list. We also accept a phrase that starts with CFWS followed by a dot; - this is registered as an InvalidHeaderDefect, since it is not supported by - even the obsolete grammar. - - """ - phrase = Phrase() - try: - token, value = get_word(value) - phrase.append(token) - except errors.HeaderParseError: - phrase.defects.append(errors.InvalidHeaderDefect( - "phrase does not start with word")) - while value and value[0] not in PHRASE_ENDS: - if value[0]=='.': - phrase.append(DOT) - phrase.defects.append(errors.ObsoleteHeaderDefect( - "period in 'phrase'")) - value = value[1:] - else: - try: - token, value = get_word(value) - except errors.HeaderParseError: - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - phrase.defects.append(errors.ObsoleteHeaderDefect( - "comment found without atom")) - else: - raise - phrase.append(token) - return phrase, value - -def get_local_part(value): - """ local-part = dot-atom / quoted-string / obs-local-part - - """ - local_part = LocalPart() - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError( - "expected local-part but found '{}'".format(value)) - try: - token, value = get_dot_atom(value) - except errors.HeaderParseError: - try: - token, value = get_word(value) - except errors.HeaderParseError: - if value[0] != '\\' and value[0] in PHRASE_ENDS: - raise - token = TokenList() - if leader is not None: - token[:0] = [leader] - local_part.append(token) - if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): - obs_local_part, value = get_obs_local_part(str(local_part) + value) - if obs_local_part.token_type == 'invalid-obs-local-part': - local_part.defects.append(errors.InvalidHeaderDefect( - "local-part is not dot-atom, quoted-string, or obs-local-part")) - else: - local_part.defects.append(errors.ObsoleteHeaderDefect( - "local-part is not a dot-atom (contains CFWS)")) - local_part[0] = obs_local_part - try: - local_part.value.encode('ascii') - except UnicodeEncodeError: - local_part.defects.append(errors.NonASCIILocalPartDefect( - "local-part contains non-ASCII characters)")) - return local_part, value - -def get_obs_local_part(value): - """ obs-local-part = word *("." word) - """ - obs_local_part = ObsLocalPart() - last_non_ws_was_dot = False - while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): - if value[0] == '.': - if last_non_ws_was_dot: - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "invalid repeated '.'")) - obs_local_part.append(DOT) - last_non_ws_was_dot = True - value = value[1:] - continue - elif value[0]=='\\': - obs_local_part.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "'\\' character outside of quoted-string/ccontent")) - last_non_ws_was_dot = False - continue - if obs_local_part and obs_local_part[-1].token_type != 'dot': - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "missing '.' between words")) - try: - token, value = get_word(value) - last_non_ws_was_dot = False - except errors.HeaderParseError: - if value[0] not in CFWS_LEADER: - raise - token, value = get_cfws(value) - obs_local_part.append(token) - if (obs_local_part[0].token_type == 'dot' or - obs_local_part[0].token_type=='cfws' and - obs_local_part[1].token_type=='dot'): - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid leading '.' in local part")) - if (obs_local_part[-1].token_type == 'dot' or - obs_local_part[-1].token_type=='cfws' and - obs_local_part[-2].token_type=='dot'): - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid trailing '.' in local part")) - if obs_local_part.defects: - obs_local_part.token_type = 'invalid-obs-local-part' - return obs_local_part, value - -def get_dtext(value): - """ dtext = <printable ascii except \ [ ]> / obs-dtext - obs-dtext = obs-NO-WS-CTL / quoted-pair - - We allow anything except the excluded characters, but if we find any - ASCII other than the RFC defined printable ASCII an NonPrintableDefect is - added to the token's defects list. Quoted pairs are converted to their - unquoted values, so what is returned is a ptext token, in this case a - ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is - added to the returned token's defect list. - - """ - ptext, value, had_qp = _get_ptext_to_endchars(value, '[]') - ptext = ValueTerminal(ptext, 'ptext') - if had_qp: - ptext.defects.append(errors.ObsoleteHeaderDefect( - "quoted printable found in domain-literal")) - _validate_xtext(ptext) - return ptext, value - -def _check_for_early_dl_end(value, domain_literal): - if value: - return False - domain_literal.append(errors.InvalidHeaderDefect( - "end of input inside domain-literal")) - domain_literal.append(ValueTerminal(']', 'domain-literal-end')) - return True - -def get_domain_literal(value): - """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] - - """ - domain_literal = DomainLiteral() - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - domain_literal.append(token) - if not value: - raise errors.HeaderParseError("expected domain-literal") - if value[0] != '[': - raise errors.HeaderParseError("expected '[' at start of domain-literal " - "but found '{}'".format(value)) - value = value[1:] - if _check_for_early_dl_end(value, domain_literal): - return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) - if value[0] in WSP: - token, value = get_fws(value) - domain_literal.append(token) - token, value = get_dtext(value) - domain_literal.append(token) - if _check_for_early_dl_end(value, domain_literal): - return domain_literal, value - if value[0] in WSP: - token, value = get_fws(value) - domain_literal.append(token) - if _check_for_early_dl_end(value, domain_literal): - return domain_literal, value - if value[0] != ']': - raise errors.HeaderParseError("expected ']' at end of domain-literal " - "but found '{}'".format(value)) - domain_literal.append(ValueTerminal(']', 'domain-literal-end')) - value = value[1:] - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - domain_literal.append(token) - return domain_literal, value - -def get_domain(value): - """ domain = dot-atom / domain-literal / obs-domain - obs-domain = atom *("." atom)) - - """ - domain = Domain() - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError( - "expected domain but found '{}'".format(value)) - if value[0] == '[': - token, value = get_domain_literal(value) - if leader is not None: - token[:0] = [leader] - domain.append(token) - return domain, value - try: - token, value = get_dot_atom(value) - except errors.HeaderParseError: - token, value = get_atom(value) - if leader is not None: - token[:0] = [leader] - domain.append(token) - if value and value[0] == '.': - domain.defects.append(errors.ObsoleteHeaderDefect( - "domain is not a dot-atom (contains CFWS)")) - if domain[0].token_type == 'dot-atom': - domain[:] = domain[0] - while value and value[0] == '.': - domain.append(DOT) - token, value = get_atom(value[1:]) - domain.append(token) - return domain, value - -def get_addr_spec(value): - """ addr-spec = local-part "@" domain - - """ - addr_spec = AddrSpec() - token, value = get_local_part(value) - addr_spec.append(token) - if not value or value[0] != '@': - addr_spec.defects.append(errors.InvalidHeaderDefect( - "add-spec local part with no domain")) - return addr_spec, value - addr_spec.append(ValueTerminal('@', 'address-at-symbol')) - token, value = get_domain(value[1:]) - addr_spec.append(token) - return addr_spec, value - -def get_obs_route(value): - """ obs-route = obs-domain-list ":" - obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain]) - - Returns an obs-route token with the appropriate sub-tokens (that is, - there is no obs-domain-list in the parse tree). - """ - obs_route = ObsRoute() - while value and (value[0]==',' or value[0] in CFWS_LEADER): - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - obs_route.append(token) - elif value[0] == ',': - obs_route.append(ListSeparator) - value = value[1:] - if not value or value[0] != '@': - raise errors.HeaderParseError( - "expected obs-route domain but found '{}'".format(value)) - obs_route.append(RouteComponentMarker) - token, value = get_domain(value[1:]) - obs_route.append(token) - while value and value[0]==',': - obs_route.append(ListSeparator) - value = value[1:] - if not value: - break - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - obs_route.append(token) - if value[0] == '@': - obs_route.append(RouteComponentMarker) - token, value = get_domain(value[1:]) - obs_route.append(token) - if not value: - raise errors.HeaderParseError("end of header while parsing obs-route") - if value[0] != ':': - raise errors.HeaderParseError( "expected ':' marking end of " - "obs-route but found '{}'".format(value)) - obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker')) - return obs_route, value[1:] - -def get_angle_addr(value): - """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr - obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] - - """ - angle_addr = AngleAddr() - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - angle_addr.append(token) - if not value or value[0] != '<': - raise errors.HeaderParseError( - "expected angle-addr but found '{}'".format(value)) - angle_addr.append(ValueTerminal('<', 'angle-addr-start')) - value = value[1:] - # Although it is not legal per RFC5322, SMTP uses '<>' in certain - # circumstances. - if value[0] == '>': - angle_addr.append(ValueTerminal('>', 'angle-addr-end')) - angle_addr.defects.append(errors.InvalidHeaderDefect( - "null addr-spec in angle-addr")) - value = value[1:] - return angle_addr, value - try: - token, value = get_addr_spec(value) - except errors.HeaderParseError: - try: - token, value = get_obs_route(value) - angle_addr.defects.append(errors.ObsoleteHeaderDefect( - "obsolete route specification in angle-addr")) - except errors.HeaderParseError: - raise errors.HeaderParseError( - "expected addr-spec or obs-route but found '{}'".format(value)) - angle_addr.append(token) - token, value = get_addr_spec(value) - angle_addr.append(token) - if value and value[0] == '>': - value = value[1:] - else: - angle_addr.defects.append(errors.InvalidHeaderDefect( - "missing trailing '>' on angle-addr")) - angle_addr.append(ValueTerminal('>', 'angle-addr-end')) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - angle_addr.append(token) - return angle_addr, value - -def get_display_name(value): - """ display-name = phrase - - Because this is simply a name-rule, we don't return a display-name - token containing a phrase, but rather a display-name token with - the content of the phrase. - - """ - display_name = DisplayName() - token, value = get_phrase(value) - display_name.extend(token[:]) - display_name.defects = token.defects[:] - return display_name, value - - -def get_name_addr(value): - """ name-addr = [display-name] angle-addr - - """ - name_addr = NameAddr() - # Both the optional display name and the angle-addr can start with cfws. - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError( - "expected name-addr but found '{}'".format(leader)) - if value[0] != '<': - if value[0] in PHRASE_ENDS: - raise errors.HeaderParseError( - "expected name-addr but found '{}'".format(value)) - token, value = get_display_name(value) - if not value: - raise errors.HeaderParseError( - "expected name-addr but found '{}'".format(token)) - if leader is not None: - token[0][:0] = [leader] - leader = None - name_addr.append(token) - token, value = get_angle_addr(value) - if leader is not None: - token[:0] = [leader] - name_addr.append(token) - return name_addr, value - -def get_mailbox(value): - """ mailbox = name-addr / addr-spec - - """ - # The only way to figure out if we are dealing with a name-addr or an - # addr-spec is to try parsing each one. - mailbox = Mailbox() - try: - token, value = get_name_addr(value) - except errors.HeaderParseError: - try: - token, value = get_addr_spec(value) - except errors.HeaderParseError: - raise errors.HeaderParseError( - "expected mailbox but found '{}'".format(value)) - if any(isinstance(x, errors.InvalidHeaderDefect) - for x in token.all_defects): - mailbox.token_type = 'invalid-mailbox' - mailbox.append(token) - return mailbox, value - -def get_invalid_mailbox(value, endchars): - """ Read everything up to one of the chars in endchars. - - This is outside the formal grammar. The InvalidMailbox TokenList that is - returned acts like a Mailbox, but the data attributes are None. - - """ - invalid_mailbox = InvalidMailbox() - while value and value[0] not in endchars: - if value[0] in PHRASE_ENDS: - invalid_mailbox.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - invalid_mailbox.append(token) - return invalid_mailbox, value - -def get_mailbox_list(value): - """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list - obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) - - For this routine we go outside the formal grammar in order to improve error - handling. We recognize the end of the mailbox list only at the end of the - value or at a ';' (the group terminator). This is so that we can turn - invalid mailboxes into InvalidMailbox tokens and continue parsing any - remaining valid mailboxes. We also allow all mailbox entries to be null, - and this condition is handled appropriately at a higher level. - - """ - mailbox_list = MailboxList() - while value and value[0] != ';': - try: - token, value = get_mailbox(value) - mailbox_list.append(token) - except errors.HeaderParseError: - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value or value[0] in ',;': - mailbox_list.append(leader) - mailbox_list.defects.append(errors.ObsoleteHeaderDefect( - "empty element in mailbox-list")) - else: - token, value = get_invalid_mailbox(value, ',;') - if leader is not None: - token[:0] = [leader] - mailbox_list.append(token) - mailbox_list.defects.append(errors.InvalidHeaderDefect( - "invalid mailbox in mailbox-list")) - elif value[0] == ',': - mailbox_list.defects.append(errors.ObsoleteHeaderDefect( - "empty element in mailbox-list")) - else: - token, value = get_invalid_mailbox(value, ',;') - if leader is not None: - token[:0] = [leader] - mailbox_list.append(token) - mailbox_list.defects.append(errors.InvalidHeaderDefect( - "invalid mailbox in mailbox-list")) - if value and value[0] not in ',;': - # Crap after mailbox; treat it as an invalid mailbox. - # The mailbox info will still be available. - mailbox = mailbox_list[-1] - mailbox.token_type = 'invalid-mailbox' - token, value = get_invalid_mailbox(value, ',;') - mailbox.extend(token) - mailbox_list.defects.append(errors.InvalidHeaderDefect( - "invalid mailbox in mailbox-list")) - if value and value[0] == ',': - mailbox_list.append(ListSeparator) - value = value[1:] - return mailbox_list, value - - -def get_group_list(value): - """ group-list = mailbox-list / CFWS / obs-group-list - obs-group-list = 1*([CFWS] ",") [CFWS] - - """ - group_list = GroupList() - if not value: - group_list.defects.append(errors.InvalidHeaderDefect( - "end of header before group-list")) - return group_list, value - leader = None - if value and value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - # This should never happen in email parsing, since CFWS-only is a - # legal alternative to group-list in a group, which is the only - # place group-list appears. - group_list.defects.append(errors.InvalidHeaderDefect( - "end of header in group-list")) - group_list.append(leader) - return group_list, value - if value[0] == ';': - group_list.append(leader) - return group_list, value - token, value = get_mailbox_list(value) - if len(token.all_mailboxes)==0: - if leader is not None: - group_list.append(leader) - group_list.extend(token) - group_list.defects.append(errors.ObsoleteHeaderDefect( - "group-list with empty entries")) - return group_list, value - if leader is not None: - token[:0] = [leader] - group_list.append(token) - return group_list, value - -def get_group(value): - """ group = display-name ":" [group-list] ";" [CFWS] - - """ - group = Group() - token, value = get_display_name(value) - if not value or value[0] != ':': - raise errors.HeaderParseError("expected ':' at end of group " - "display name but found '{}'".format(value)) - group.append(token) - group.append(ValueTerminal(':', 'group-display-name-terminator')) - value = value[1:] - if value and value[0] == ';': - group.append(ValueTerminal(';', 'group-terminator')) - return group, value[1:] - token, value = get_group_list(value) - group.append(token) - if not value: - group.defects.append(errors.InvalidHeaderDefect( - "end of header in group")) - if value[0] != ';': - raise errors.HeaderParseError( - "expected ';' at end of group but found {}".format(value)) - group.append(ValueTerminal(';', 'group-terminator')) - value = value[1:] - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - group.append(token) - return group, value - -def get_address(value): - """ address = mailbox / group - - Note that counter-intuitively, an address can be either a single address or - a list of addresses (a group). This is why the returned Address object has - a 'mailboxes' attribute which treats a single address as a list of length - one. When you need to differentiate between to two cases, extract the single - element, which is either a mailbox or a group token. - - """ - # The formal grammar isn't very helpful when parsing an address. mailbox - # and group, especially when allowing for obsolete forms, start off very - # similarly. It is only when you reach one of @, <, or : that you know - # what you've got. So, we try each one in turn, starting with the more - # likely of the two. We could perhaps make this more efficient by looking - # for a phrase and then branching based on the next character, but that - # would be a premature optimization. - address = Address() - try: - token, value = get_group(value) - except errors.HeaderParseError: - try: - token, value = get_mailbox(value) - except errors.HeaderParseError: - raise errors.HeaderParseError( - "expected address but found '{}'".format(value)) - address.append(token) - return address, value - -def get_address_list(value): - """ address_list = (address *("," address)) / obs-addr-list - obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) - - We depart from the formal grammar here by continuing to parse until the end - of the input, assuming the input to be entirely composed of an - address-list. This is always true in email parsing, and allows us - to skip invalid addresses to parse additional valid ones. - - """ - address_list = AddressList() - while value: - try: - token, value = get_address(value) - address_list.append(token) - except errors.HeaderParseError as err: - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value or value[0] == ',': - address_list.append(leader) - address_list.defects.append(errors.ObsoleteHeaderDefect( - "address-list entry with no content")) - else: - token, value = get_invalid_mailbox(value, ',') - if leader is not None: - token[:0] = [leader] - address_list.append(Address([token])) - address_list.defects.append(errors.InvalidHeaderDefect( - "invalid address in address-list")) - elif value[0] == ',': - address_list.defects.append(errors.ObsoleteHeaderDefect( - "empty element in address-list")) - else: - token, value = get_invalid_mailbox(value, ',') - if leader is not None: - token[:0] = [leader] - address_list.append(Address([token])) - address_list.defects.append(errors.InvalidHeaderDefect( - "invalid address in address-list")) - if value and value[0] != ',': - # Crap after address; treat it as an invalid mailbox. - # The mailbox info will still be available. - mailbox = address_list[-1][0] - mailbox.token_type = 'invalid-mailbox' - token, value = get_invalid_mailbox(value, ',') - mailbox.extend(token) - address_list.defects.append(errors.InvalidHeaderDefect( - "invalid address in address-list")) - if value: # Must be a , at this point. - address_list.append(ValueTerminal(',', 'list-separator')) - value = value[1:] - return address_list, value - -# -# XXX: As I begin to add additional header parsers, I'm realizing we probably -# have two level of parser routines: the get_XXX methods that get a token in -# the grammar, and parse_XXX methods that parse an entire field value. So -# get_address_list above should really be a parse_ method, as probably should -# be get_unstructured. -# - -def parse_mime_version(value): - """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS] - - """ - # The [CFWS] is implicit in the RFC 2045 BNF. - # XXX: This routine is a bit verbose, should factor out a get_int method. - mime_version = MIMEVersion() - if not value: - mime_version.defects.append(errors.HeaderMissingRequiredValue( - "Missing MIME version number (eg: 1.0)")) - return mime_version - if value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if not value: - mime_version.defects.append(errors.HeaderMissingRequiredValue( - "Expected MIME version number but found only CFWS")) - digits = '' - while value and value[0] != '.' and value[0] not in CFWS_LEADER: - digits += value[0] - value = value[1:] - if not digits.isdigit(): - mime_version.defects.append(errors.InvalidHeaderDefect( - "Expected MIME major version number but found {!r}".format(digits))) - mime_version.append(ValueTerminal(digits, 'xtext')) - else: - mime_version.major = int(digits) - mime_version.append(ValueTerminal(digits, 'digits')) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if not value or value[0] != '.': - if mime_version.major is not None: - mime_version.defects.append(errors.InvalidHeaderDefect( - "Incomplete MIME version; found only major number")) - if value: - mime_version.append(ValueTerminal(value, 'xtext')) - return mime_version - mime_version.append(ValueTerminal('.', 'version-separator')) - value = value[1:] - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if not value: - if mime_version.major is not None: - mime_version.defects.append(errors.InvalidHeaderDefect( - "Incomplete MIME version; found only major number")) - return mime_version - digits = '' - while value and value[0] not in CFWS_LEADER: - digits += value[0] - value = value[1:] - if not digits.isdigit(): - mime_version.defects.append(errors.InvalidHeaderDefect( - "Expected MIME minor version number but found {!r}".format(digits))) - mime_version.append(ValueTerminal(digits, 'xtext')) - else: - mime_version.minor = int(digits) - mime_version.append(ValueTerminal(digits, 'digits')) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mime_version.append(token) - if value: - mime_version.defects.append(errors.InvalidHeaderDefect( - "Excess non-CFWS text after MIME version")) - mime_version.append(ValueTerminal(value, 'xtext')) - return mime_version - -def get_invalid_parameter(value): - """ Read everything up to the next ';'. - - This is outside the formal grammar. The InvalidParameter TokenList that is - returned acts like a Parameter, but the data attributes are None. - - """ - invalid_parameter = InvalidParameter() - while value and value[0] != ';': - if value[0] in PHRASE_ENDS: - invalid_parameter.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - invalid_parameter.append(token) - return invalid_parameter, value - -def get_ttext(value): - """ttext = <matches _ttext_matcher> - - We allow any non-TOKEN_ENDS in ttext, but add defects to the token's - defects list if we find non-ttext characters. We also register defects for - *any* non-printables even though the RFC doesn't exclude all of them, - because we follow the spirit of RFC 5322. - - """ - m = _non_token_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected ttext but found '{}'".format(value)) - ttext = m.group() - value = value[len(ttext):] - ttext = ValueTerminal(ttext, 'ttext') - _validate_xtext(ttext) - return ttext, value - -def get_token(value): - """token = [CFWS] 1*ttext [CFWS] - - The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or - tspecials. We also exclude tabs even though the RFC doesn't. - - The RFC implies the CFWS but is not explicit about it in the BNF. - - """ - mtoken = Token() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mtoken.append(token) - if value and value[0] in TOKEN_ENDS: - raise errors.HeaderParseError( - "expected token but found '{}'".format(value)) - token, value = get_ttext(value) - mtoken.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - mtoken.append(token) - return mtoken, value - -def get_attrtext(value): - """attrtext = 1*(any non-ATTRIBUTE_ENDS character) - - We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the - token's defects list if we find non-attrtext characters. We also register - defects for *any* non-printables even though the RFC doesn't exclude all of - them, because we follow the spirit of RFC 5322. - - """ - m = _non_attribute_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected attrtext but found {!r}".format(value)) - attrtext = m.group() - value = value[len(attrtext):] - attrtext = ValueTerminal(attrtext, 'attrtext') - _validate_xtext(attrtext) - return attrtext, value - -def get_attribute(value): - """ [CFWS] 1*attrtext [CFWS] - - This version of the BNF makes the CFWS explicit, and as usual we use a - value terminal for the actual run of characters. The RFC equivalent of - attrtext is the token characters, with the subtraction of '*', "'", and '%'. - We include tab in the excluded set just as we do for token. - - """ - attribute = Attribute() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - if value and value[0] in ATTRIBUTE_ENDS: - raise errors.HeaderParseError( - "expected token but found '{}'".format(value)) - token, value = get_attrtext(value) - attribute.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - return attribute, value - -def get_extended_attrtext(value): - """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%') - - This is a special parsing routine so that we get a value that - includes % escapes as a single string (which we decode as a single - string later). - - """ - m = _non_extended_attribute_end_matcher(value) - if not m: - raise errors.HeaderParseError( - "expected extended attrtext but found {!r}".format(value)) - attrtext = m.group() - value = value[len(attrtext):] - attrtext = ValueTerminal(attrtext, 'extended-attrtext') - _validate_xtext(attrtext) - return attrtext, value - -def get_extended_attribute(value): - """ [CFWS] 1*extended_attrtext [CFWS] - - This is like the non-extended version except we allow % characters, so that - we can pick up an encoded value as a single string. - - """ - # XXX: should we have an ExtendedAttribute TokenList? - attribute = Attribute() - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - if value and value[0] in EXTENDED_ATTRIBUTE_ENDS: - raise errors.HeaderParseError( - "expected token but found '{}'".format(value)) - token, value = get_extended_attrtext(value) - attribute.append(token) - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - attribute.append(token) - return attribute, value - -def get_section(value): - """ '*' digits - - The formal BNF is more complicated because leading 0s are not allowed. We - check for that and add a defect. We also assume no CFWS is allowed between - the '*' and the digits, though the RFC is not crystal clear on that. - The caller should already have dealt with leading CFWS. - - """ - section = Section() - if not value or value[0] != '*': - raise errors.HeaderParseError("Expected section but found {}".format( - value)) - section.append(ValueTerminal('*', 'section-marker')) - value = value[1:] - if not value or not value[0].isdigit(): - raise errors.HeaderParseError("Expected section number but " - "found {}".format(value)) - digits = '' - while value and value[0].isdigit(): - digits += value[0] - value = value[1:] - if digits[0] == '0' and digits != '0': - section.defects.append(errors.InvalidHeaderError("section number" - "has an invalid leading 0")) - section.number = int(digits) - section.append(ValueTerminal(digits, 'digits')) - return section, value - - -def get_value(value): - """ quoted-string / attribute - - """ - v = Value() - if not value: - raise errors.HeaderParseError("Expected value but found end of string") - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - raise errors.HeaderParseError("Expected value but found " - "only {}".format(leader)) - if value[0] == '"': - token, value = get_quoted_string(value) - else: - token, value = get_extended_attribute(value) - if leader is not None: - token[:0] = [leader] - v.append(token) - return v, value - -def get_parameter(value): - """ attribute [section] ["*"] [CFWS] "=" value - - The CFWS is implied by the RFC but not made explicit in the BNF. This - simplified form of the BNF from the RFC is made to conform with the RFC BNF - through some extra checks. We do it this way because it makes both error - recovery and working with the resulting parse tree easier. - """ - # It is possible CFWS would also be implicitly allowed between the section - # and the 'extended-attribute' marker (the '*') , but we've never seen that - # in the wild and we will therefore ignore the possibility. - param = Parameter() - token, value = get_attribute(value) - param.append(token) - if not value or value[0] == ';': - param.defects.append(errors.InvalidHeaderDefect("Parameter contains " - "name ({}) but no value".format(token))) - return param, value - if value[0] == '*': - try: - token, value = get_section(value) - param.sectioned = True - param.append(token) - except errors.HeaderParseError: - pass - if not value: - raise errors.HeaderParseError("Incomplete parameter") - if value[0] == '*': - param.append(ValueTerminal('*', 'extended-parameter-marker')) - value = value[1:] - param.extended = True - if value[0] != '=': - raise errors.HeaderParseError("Parameter not followed by '='") - param.append(ValueTerminal('=', 'parameter-separator')) - value = value[1:] - leader = None - if value and value[0] in CFWS_LEADER: - token, value = get_cfws(value) - param.append(token) - remainder = None - appendto = param - if param.extended and value and value[0] == '"': - # Now for some serious hackery to handle the common invalid case of - # double quotes around an extended value. We also accept (with defect) - # a value marked as encoded that isn't really. - qstring, remainder = get_quoted_string(value) - inner_value = qstring.stripped_value - semi_valid = False - if param.section_number == 0: - if inner_value and inner_value[0] == "'": - semi_valid = True - else: - token, rest = get_attrtext(inner_value) - if rest and rest[0] == "'": - semi_valid = True - else: - try: - token, rest = get_extended_attrtext(inner_value) - except: - pass - else: - if not rest: - semi_valid = True - if semi_valid: - param.defects.append(errors.InvalidHeaderDefect( - "Quoted string value for extended parameter is invalid")) - param.append(qstring) - for t in qstring: - if t.token_type == 'bare-quoted-string': - t[:] = [] - appendto = t - break - value = inner_value - else: - remainder = None - param.defects.append(errors.InvalidHeaderDefect( - "Parameter marked as extended but appears to have a " - "quoted string value that is non-encoded")) - if value and value[0] == "'": - token = None - else: - token, value = get_value(value) - if not param.extended or param.section_number > 0: - if not value or value[0] != "'": - appendto.append(token) - if remainder is not None: - assert not value, value - value = remainder - return param, value - param.defects.append(errors.InvalidHeaderDefect( - "Apparent initial-extended-value but attribute " - "was not marked as extended or was not initial section")) - if not value: - # Assume the charset/lang is missing and the token is the value. - param.defects.append(errors.InvalidHeaderDefect( - "Missing required charset/lang delimiters")) - appendto.append(token) - if remainder is None: - return param, value - else: - if token is not None: - for t in token: - if t.token_type == 'extended-attrtext': - break - t.token_type == 'attrtext' - appendto.append(t) - param.charset = t.value - if value[0] != "'": - raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " - "delimiter, but found {!r}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) - value = value[1:] - if value and value[0] != "'": - token, value = get_attrtext(value) - appendto.append(token) - param.lang = token.value - if not value or value[0] != "'": - raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " - "delimiter, but found {}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) - value = value[1:] - if remainder is not None: - # Treat the rest of value as bare quoted string content. - v = Value() - while value: - if value[0] in WSP: - token, value = get_fws(value) - else: - token, value = get_qcontent(value) - v.append(token) - token = v - else: - token, value = get_value(value) - appendto.append(token) - if remainder is not None: - assert not value, value - value = remainder - return param, value - -def parse_mime_parameters(value): - """ parameter *( ";" parameter ) - - That BNF is meant to indicate this routine should only be called after - finding and handling the leading ';'. There is no corresponding rule in - the formal RFC grammar, but it is more convenient for us for the set of - parameters to be treated as its own TokenList. - - This is 'parse' routine because it consumes the reminaing value, but it - would never be called to parse a full header. Instead it is called to - parse everything after the non-parameter value of a specific MIME header. - - """ - mime_parameters = MimeParameters() - while value: - try: - token, value = get_parameter(value) - mime_parameters.append(token) - except errors.HeaderParseError as err: - leader = None - if value[0] in CFWS_LEADER: - leader, value = get_cfws(value) - if not value: - mime_parameters.append(leader) - return mime_parameters - if value[0] == ';': - if leader is not None: - mime_parameters.append(leader) - mime_parameters.defects.append(errors.InvalidHeaderDefect( - "parameter entry with no content")) - else: - token, value = get_invalid_parameter(value) - if leader: - token[:0] = [leader] - mime_parameters.append(token) - mime_parameters.defects.append(errors.InvalidHeaderDefect( - "invalid parameter {!r}".format(token))) - if value and value[0] != ';': - # Junk after the otherwise valid parameter. Mark it as - # invalid, but it will have a value. - param = mime_parameters[-1] - param.token_type = 'invalid-parameter' - token, value = get_invalid_parameter(value) - param.extend(token) - mime_parameters.defects.append(errors.InvalidHeaderDefect( - "parameter with invalid trailing text {!r}".format(token))) - if value: - # Must be a ';' at this point. - mime_parameters.append(ValueTerminal(';', 'parameter-separator')) - value = value[1:] - return mime_parameters - -def _find_mime_parameters(tokenlist, value): - """Do our best to find the parameters in an invalid MIME header - - """ - while value and value[0] != ';': - if value[0] in PHRASE_ENDS: - tokenlist.append(ValueTerminal(value[0], 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - tokenlist.append(token) - if not value: - return - tokenlist.append(ValueTerminal(';', 'parameter-separator')) - tokenlist.append(parse_mime_parameters(value[1:])) - -def parse_content_type_header(value): - """ maintype "/" subtype *( ";" parameter ) - - The maintype and substype are tokens. Theoretically they could - be checked against the official IANA list + x-token, but we - don't do that. - """ - ctype = ContentType() - recover = False - if not value: - ctype.defects.append(errors.HeaderMissingRequiredValue( - "Missing content type specification")) - return ctype - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content maintype but found {!r}".format(value))) - _find_mime_parameters(ctype, value) - return ctype - ctype.append(token) - # XXX: If we really want to follow the formal grammer we should make - # mantype and subtype specialized TokenLists here. Probably not worth it. - if not value or value[0] != '/': - ctype.defects.append(errors.InvalidHeaderDefect( - "Invalid content type")) - if value: - _find_mime_parameters(ctype, value) - return ctype - ctype.maintype = token.value.strip().lower() - ctype.append(ValueTerminal('/', 'content-type-separator')) - value = value[1:] - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content subtype but found {!r}".format(value))) - _find_mime_parameters(ctype, value) - return ctype - ctype.append(token) - ctype.subtype = token.value.strip().lower() - if not value: - return ctype - if value[0] != ';': - ctype.defects.append(errors.InvalidHeaderDefect( - "Only parameters are valid after content type, but " - "found {!r}".format(value))) - # The RFC requires that a syntactically invalid content-type be treated - # as text/plain. Perhaps we should postel this, but we should probably - # only do that if we were checking the subtype value against IANA. - del ctype.maintype, ctype.subtype - _find_mime_parameters(ctype, value) - return ctype - ctype.append(ValueTerminal(';', 'parameter-separator')) - ctype.append(parse_mime_parameters(value[1:])) - return ctype - -def parse_content_disposition_header(value): - """ disposition-type *( ";" parameter ) - - """ - disp_header = ContentDisposition() - if not value: - disp_header.defects.append(errors.HeaderMissingRequiredValue( - "Missing content disposition")) - return disp_header - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content disposition but found {!r}".format(value))) - _find_mime_parameters(disp_header, value) - return disp_header - disp_header.append(token) - disp_header.content_disposition = token.value.strip().lower() - if not value: - return disp_header - if value[0] != ';': - disp_header.defects.append(errors.InvalidHeaderDefect( - "Only parameters are valid after content disposition, but " - "found {!r}".format(value))) - _find_mime_parameters(disp_header, value) - return disp_header - disp_header.append(ValueTerminal(';', 'parameter-separator')) - disp_header.append(parse_mime_parameters(value[1:])) - return disp_header - -def parse_content_transfer_encoding_header(value): - """ mechanism - - """ - # We should probably validate the values, since the list is fixed. - cte_header = ContentTransferEncoding() - if not value: - cte_header.defects.append(errors.HeaderMissingRequiredValue( - "Missing content transfer encoding")) - return cte_header - try: - token, value = get_token(value) - except errors.HeaderParseError: - ctype.defects.append(errors.InvalidHeaderDefect( - "Expected content trnasfer encoding but found {!r}".format(value))) - else: - cte_header.append(token) - cte_header.cte = token.value.strip().lower() - if not value: - return cte_header - while value: - cte_header.defects.append(errors.InvalidHeaderDefect( - "Extra text after content transfer encoding")) - if value[0] in PHRASE_ENDS: - cte_header.append(ValueTerminal(value[0], 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - cte_header.append(token) - return cte_header diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/_parseaddr.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/_parseaddr.py deleted file mode 100644 index 5b50cc6..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/_parseaddr.py +++ /dev/null @@ -1,546 +0,0 @@ -# Copyright (C) 2002-2007 Python Software Foundation -# Contact: email-sig@python.org - -"""Email address parsing code. - -Lifted directly from rfc822.py. This should eventually be rewritten. -""" - -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import int - -__all__ = [ - 'mktime_tz', - 'parsedate', - 'parsedate_tz', - 'quote', - ] - -import time, calendar - -SPACE = ' ' -EMPTYSTRING = '' -COMMASPACE = ', ' - -# Parse a date field -_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', - 'aug', 'sep', 'oct', 'nov', 'dec', - 'january', 'february', 'march', 'april', 'may', 'june', 'july', - 'august', 'september', 'october', 'november', 'december'] - -_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] - -# The timezone table does not include the military time zones defined -# in RFC822, other than Z. According to RFC1123, the description in -# RFC822 gets the signs wrong, so we can't rely on any such time -# zones. RFC1123 recommends that numeric timezone indicators be used -# instead of timezone names. - -_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, - 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) - 'EST': -500, 'EDT': -400, # Eastern - 'CST': -600, 'CDT': -500, # Central - 'MST': -700, 'MDT': -600, # Mountain - 'PST': -800, 'PDT': -700 # Pacific - } - - -def parsedate_tz(data): - """Convert a date string to a time tuple. - - Accounts for military timezones. - """ - res = _parsedate_tz(data) - if not res: - return - if res[9] is None: - res[9] = 0 - return tuple(res) - -def _parsedate_tz(data): - """Convert date to extended time tuple. - - The last (additional) element is the time zone offset in seconds, except if - the timezone was specified as -0000. In that case the last element is - None. This indicates a UTC timestamp that explicitly declaims knowledge of - the source timezone, as opposed to a +0000 timestamp that indicates the - source timezone really was UTC. - - """ - if not data: - return - data = data.split() - # The FWS after the comma after the day-of-week is optional, so search and - # adjust for this. - if data[0].endswith(',') or data[0].lower() in _daynames: - # There's a dayname here. Skip it - del data[0] - else: - i = data[0].rfind(',') - if i >= 0: - data[0] = data[0][i+1:] - if len(data) == 3: # RFC 850 date, deprecated - stuff = data[0].split('-') - if len(stuff) == 3: - data = stuff + data[1:] - if len(data) == 4: - s = data[3] - i = s.find('+') - if i == -1: - i = s.find('-') - if i > 0: - data[3:] = [s[:i], s[i:]] - else: - data.append('') # Dummy tz - if len(data) < 5: - return None - data = data[:5] - [dd, mm, yy, tm, tz] = data - mm = mm.lower() - if mm not in _monthnames: - dd, mm = mm, dd.lower() - if mm not in _monthnames: - return None - mm = _monthnames.index(mm) + 1 - if mm > 12: - mm -= 12 - if dd[-1] == ',': - dd = dd[:-1] - i = yy.find(':') - if i > 0: - yy, tm = tm, yy - if yy[-1] == ',': - yy = yy[:-1] - if not yy[0].isdigit(): - yy, tz = tz, yy - if tm[-1] == ',': - tm = tm[:-1] - tm = tm.split(':') - if len(tm) == 2: - [thh, tmm] = tm - tss = '0' - elif len(tm) == 3: - [thh, tmm, tss] = tm - elif len(tm) == 1 and '.' in tm[0]: - # Some non-compliant MUAs use '.' to separate time elements. - tm = tm[0].split('.') - if len(tm) == 2: - [thh, tmm] = tm - tss = 0 - elif len(tm) == 3: - [thh, tmm, tss] = tm - else: - return None - try: - yy = int(yy) - dd = int(dd) - thh = int(thh) - tmm = int(tmm) - tss = int(tss) - except ValueError: - return None - # Check for a yy specified in two-digit format, then convert it to the - # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for - # the time module. - if yy < 100: - # The year is between 1969 and 1999 (inclusive). - if yy > 68: - yy += 1900 - # The year is between 2000 and 2068 (inclusive). - else: - yy += 2000 - tzoffset = None - tz = tz.upper() - if tz in _timezones: - tzoffset = _timezones[tz] - else: - try: - tzoffset = int(tz) - except ValueError: - pass - if tzoffset==0 and tz.startswith('-'): - tzoffset = None - # Convert a timezone offset into seconds ; -0500 -> -18000 - if tzoffset: - if tzoffset < 0: - tzsign = -1 - tzoffset = -tzoffset - else: - tzsign = 1 - tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) - # Daylight Saving Time flag is set to -1, since DST is unknown. - return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] - - -def parsedate(data): - """Convert a time string to a time tuple.""" - t = parsedate_tz(data) - if isinstance(t, tuple): - return t[:9] - else: - return t - - -def mktime_tz(data): - """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" - if data[9] is None: - # No zone info, so localtime is better assumption than GMT - return time.mktime(data[:8] + (-1,)) - else: - t = calendar.timegm(data) - return t - data[9] - - -def quote(str): - """Prepare string to be used in a quoted string. - - Turns backslash and double quote characters into quoted pairs. These - are the only characters that need to be quoted inside a quoted string. - Does not add the surrounding double quotes. - """ - return str.replace('\\', '\\\\').replace('"', '\\"') - - -class AddrlistClass(object): - """Address parser class by Ben Escoto. - - To understand what this class does, it helps to have a copy of RFC 2822 in - front of you. - - Note: this class interface is deprecated and may be removed in the future. - Use email.utils.AddressList instead. - """ - - def __init__(self, field): - """Initialize a new instance. - - `field' is an unparsed address header field, containing - one or more addresses. - """ - self.specials = '()<>@,:;.\"[]' - self.pos = 0 - self.LWS = ' \t' - self.CR = '\r\n' - self.FWS = self.LWS + self.CR - self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. - self.phraseends = self.atomends.replace('.', '') - self.field = field - self.commentlist = [] - - def gotonext(self): - """Skip white space and extract comments.""" - wslist = [] - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS + '\n\r': - if self.field[self.pos] not in '\n\r': - wslist.append(self.field[self.pos]) - self.pos += 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - else: - break - return EMPTYSTRING.join(wslist) - - def getaddrlist(self): - """Parse all addresses. - - Returns a list containing all of the addresses. - """ - result = [] - while self.pos < len(self.field): - ad = self.getaddress() - if ad: - result += ad - else: - result.append(('', '')) - return result - - def getaddress(self): - """Parse the next address.""" - self.commentlist = [] - self.gotonext() - - oldpos = self.pos - oldcl = self.commentlist - plist = self.getphraselist() - - self.gotonext() - returnlist = [] - - if self.pos >= len(self.field): - # Bad email address technically, no domain. - if plist: - returnlist = [(SPACE.join(self.commentlist), plist[0])] - - elif self.field[self.pos] in '.@': - # email address is just an addrspec - # this isn't very efficient since we start over - self.pos = oldpos - self.commentlist = oldcl - addrspec = self.getaddrspec() - returnlist = [(SPACE.join(self.commentlist), addrspec)] - - elif self.field[self.pos] == ':': - # address is a group - returnlist = [] - - fieldlen = len(self.field) - self.pos += 1 - while self.pos < len(self.field): - self.gotonext() - if self.pos < fieldlen and self.field[self.pos] == ';': - self.pos += 1 - break - returnlist = returnlist + self.getaddress() - - elif self.field[self.pos] == '<': - # Address is a phrase then a route addr - routeaddr = self.getrouteaddr() - - if self.commentlist: - returnlist = [(SPACE.join(plist) + ' (' + - ' '.join(self.commentlist) + ')', routeaddr)] - else: - returnlist = [(SPACE.join(plist), routeaddr)] - - else: - if plist: - returnlist = [(SPACE.join(self.commentlist), plist[0])] - elif self.field[self.pos] in self.specials: - self.pos += 1 - - self.gotonext() - if self.pos < len(self.field) and self.field[self.pos] == ',': - self.pos += 1 - return returnlist - - def getrouteaddr(self): - """Parse a route address (Return-path value). - - This method just skips all the route stuff and returns the addrspec. - """ - if self.field[self.pos] != '<': - return - - expectroute = False - self.pos += 1 - self.gotonext() - adlist = '' - while self.pos < len(self.field): - if expectroute: - self.getdomain() - expectroute = False - elif self.field[self.pos] == '>': - self.pos += 1 - break - elif self.field[self.pos] == '@': - self.pos += 1 - expectroute = True - elif self.field[self.pos] == ':': - self.pos += 1 - else: - adlist = self.getaddrspec() - self.pos += 1 - break - self.gotonext() - - return adlist - - def getaddrspec(self): - """Parse an RFC 2822 addr-spec.""" - aslist = [] - - self.gotonext() - while self.pos < len(self.field): - preserve_ws = True - if self.field[self.pos] == '.': - if aslist and not aslist[-1].strip(): - aslist.pop() - aslist.append('.') - self.pos += 1 - preserve_ws = False - elif self.field[self.pos] == '"': - aslist.append('"%s"' % quote(self.getquote())) - elif self.field[self.pos] in self.atomends: - if aslist and not aslist[-1].strip(): - aslist.pop() - break - else: - aslist.append(self.getatom()) - ws = self.gotonext() - if preserve_ws and ws: - aslist.append(ws) - - if self.pos >= len(self.field) or self.field[self.pos] != '@': - return EMPTYSTRING.join(aslist) - - aslist.append('@') - self.pos += 1 - self.gotonext() - return EMPTYSTRING.join(aslist) + self.getdomain() - - def getdomain(self): - """Get the complete domain name from an address.""" - sdlist = [] - while self.pos < len(self.field): - if self.field[self.pos] in self.LWS: - self.pos += 1 - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] == '[': - sdlist.append(self.getdomainliteral()) - elif self.field[self.pos] == '.': - self.pos += 1 - sdlist.append('.') - elif self.field[self.pos] in self.atomends: - break - else: - sdlist.append(self.getatom()) - return EMPTYSTRING.join(sdlist) - - def getdelimited(self, beginchar, endchars, allowcomments=True): - """Parse a header fragment delimited by special characters. - - `beginchar' is the start character for the fragment. - If self is not looking at an instance of `beginchar' then - getdelimited returns the empty string. - - `endchars' is a sequence of allowable end-delimiting characters. - Parsing stops when one of these is encountered. - - If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed - within the parsed fragment. - """ - if self.field[self.pos] != beginchar: - return '' - - slist = [''] - quote = False - self.pos += 1 - while self.pos < len(self.field): - if quote: - slist.append(self.field[self.pos]) - quote = False - elif self.field[self.pos] in endchars: - self.pos += 1 - break - elif allowcomments and self.field[self.pos] == '(': - slist.append(self.getcomment()) - continue # have already advanced pos from getcomment - elif self.field[self.pos] == '\\': - quote = True - else: - slist.append(self.field[self.pos]) - self.pos += 1 - - return EMPTYSTRING.join(slist) - - def getquote(self): - """Get a quote-delimited fragment from self's field.""" - return self.getdelimited('"', '"\r', False) - - def getcomment(self): - """Get a parenthesis-delimited fragment from self's field.""" - return self.getdelimited('(', ')\r', True) - - def getdomainliteral(self): - """Parse an RFC 2822 domain-literal.""" - return '[%s]' % self.getdelimited('[', ']\r', False) - - def getatom(self, atomends=None): - """Parse an RFC 2822 atom. - - Optional atomends specifies a different set of end token delimiters - (the default is to use self.atomends). This is used e.g. in - getphraselist() since phrase endings must not include the `.' (which - is legal in phrases).""" - atomlist = [''] - if atomends is None: - atomends = self.atomends - - while self.pos < len(self.field): - if self.field[self.pos] in atomends: - break - else: - atomlist.append(self.field[self.pos]) - self.pos += 1 - - return EMPTYSTRING.join(atomlist) - - def getphraselist(self): - """Parse a sequence of RFC 2822 phrases. - - A phrase is a sequence of words, which are in turn either RFC 2822 - atoms or quoted-strings. Phrases are canonicalized by squeezing all - runs of continuous whitespace into one space. - """ - plist = [] - - while self.pos < len(self.field): - if self.field[self.pos] in self.FWS: - self.pos += 1 - elif self.field[self.pos] == '"': - plist.append(self.getquote()) - elif self.field[self.pos] == '(': - self.commentlist.append(self.getcomment()) - elif self.field[self.pos] in self.phraseends: - break - else: - plist.append(self.getatom(self.phraseends)) - - return plist - -class AddressList(AddrlistClass): - """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" - def __init__(self, field): - AddrlistClass.__init__(self, field) - if field: - self.addresslist = self.getaddrlist() - else: - self.addresslist = [] - - def __len__(self): - return len(self.addresslist) - - def __add__(self, other): - # Set union - newaddr = AddressList(None) - newaddr.addresslist = self.addresslist[:] - for x in other.addresslist: - if not x in self.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __iadd__(self, other): - # Set union, in-place - for x in other.addresslist: - if not x in self.addresslist: - self.addresslist.append(x) - return self - - def __sub__(self, other): - # Set difference - newaddr = AddressList(None) - for x in self.addresslist: - if not x in other.addresslist: - newaddr.addresslist.append(x) - return newaddr - - def __isub__(self, other): - # Set difference, in-place - for x in other.addresslist: - if x in self.addresslist: - self.addresslist.remove(x) - return self - - def __getitem__(self, index): - # Make indexing, slices, and 'in' work - return self.addresslist[index] diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/_policybase.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/_policybase.py deleted file mode 100644 index c66aea9..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/_policybase.py +++ /dev/null @@ -1,365 +0,0 @@ -"""Policy framework for the email package. - -Allows fine grained feature control of how the package parses and emits data. -""" -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import super -from future.builtins import str -from future.utils import with_metaclass - -import abc -from future.backports.email import header -from future.backports.email import charset as _charset -from future.backports.email.utils import _has_surrogates - -__all__ = [ - 'Policy', - 'Compat32', - 'compat32', - ] - - -class _PolicyBase(object): - - """Policy Object basic framework. - - This class is useless unless subclassed. A subclass should define - class attributes with defaults for any values that are to be - managed by the Policy object. The constructor will then allow - non-default values to be set for these attributes at instance - creation time. The instance will be callable, taking these same - attributes keyword arguments, and returning a new instance - identical to the called instance except for those values changed - by the keyword arguments. Instances may be added, yielding new - instances with any non-default values from the right hand - operand overriding those in the left hand operand. That is, - - A + B == A(<non-default values of B>) - - The repr of an instance can be used to reconstruct the object - if and only if the repr of the values can be used to reconstruct - those values. - - """ - - def __init__(self, **kw): - """Create new Policy, possibly overriding some defaults. - - See class docstring for a list of overridable attributes. - - """ - for name, value in kw.items(): - if hasattr(self, name): - super(_PolicyBase,self).__setattr__(name, value) - else: - raise TypeError( - "{!r} is an invalid keyword argument for {}".format( - name, self.__class__.__name__)) - - def __repr__(self): - args = [ "{}={!r}".format(name, value) - for name, value in self.__dict__.items() ] - return "{}({})".format(self.__class__.__name__, ', '.join(args)) - - def clone(self, **kw): - """Return a new instance with specified attributes changed. - - The new instance has the same attribute values as the current object, - except for the changes passed in as keyword arguments. - - """ - newpolicy = self.__class__.__new__(self.__class__) - for attr, value in self.__dict__.items(): - object.__setattr__(newpolicy, attr, value) - for attr, value in kw.items(): - if not hasattr(self, attr): - raise TypeError( - "{!r} is an invalid keyword argument for {}".format( - attr, self.__class__.__name__)) - object.__setattr__(newpolicy, attr, value) - return newpolicy - - def __setattr__(self, name, value): - if hasattr(self, name): - msg = "{!r} object attribute {!r} is read-only" - else: - msg = "{!r} object has no attribute {!r}" - raise AttributeError(msg.format(self.__class__.__name__, name)) - - def __add__(self, other): - """Non-default values from right operand override those from left. - - The object returned is a new instance of the subclass. - - """ - return self.clone(**other.__dict__) - - -def _append_doc(doc, added_doc): - doc = doc.rsplit('\n', 1)[0] - added_doc = added_doc.split('\n', 1)[1] - return doc + '\n' + added_doc - -def _extend_docstrings(cls): - if cls.__doc__ and cls.__doc__.startswith('+'): - cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__) - for name, attr in cls.__dict__.items(): - if attr.__doc__ and attr.__doc__.startswith('+'): - for c in (c for base in cls.__bases__ for c in base.mro()): - doc = getattr(getattr(c, name), '__doc__') - if doc: - attr.__doc__ = _append_doc(doc, attr.__doc__) - break - return cls - - -class Policy(with_metaclass(abc.ABCMeta, _PolicyBase)): - - r"""Controls for how messages are interpreted and formatted. - - Most of the classes and many of the methods in the email package accept - Policy objects as parameters. A Policy object contains a set of values and - functions that control how input is interpreted and how output is rendered. - For example, the parameter 'raise_on_defect' controls whether or not an RFC - violation results in an error being raised or not, while 'max_line_length' - controls the maximum length of output lines when a Message is serialized. - - Any valid attribute may be overridden when a Policy is created by passing - it as a keyword argument to the constructor. Policy objects are immutable, - but a new Policy object can be created with only certain values changed by - calling the Policy instance with keyword arguments. Policy objects can - also be added, producing a new Policy object in which the non-default - attributes set in the right hand operand overwrite those specified in the - left operand. - - Settable attributes: - - raise_on_defect -- If true, then defects should be raised as errors. - Default: False. - - linesep -- string containing the value to use as separation - between output lines. Default '\n'. - - cte_type -- Type of allowed content transfer encodings - - 7bit -- ASCII only - 8bit -- Content-Transfer-Encoding: 8bit is allowed - - Default: 8bit. Also controls the disposition of - (RFC invalid) binary data in headers; see the - documentation of the binary_fold method. - - max_line_length -- maximum length of lines, excluding 'linesep', - during serialization. None or 0 means no line - wrapping is done. Default is 78. - - """ - - raise_on_defect = False - linesep = '\n' - cte_type = '8bit' - max_line_length = 78 - - def handle_defect(self, obj, defect): - """Based on policy, either raise defect or call register_defect. - - handle_defect(obj, defect) - - defect should be a Defect subclass, but in any case must be an - Exception subclass. obj is the object on which the defect should be - registered if it is not raised. If the raise_on_defect is True, the - defect is raised as an error, otherwise the object and the defect are - passed to register_defect. - - This method is intended to be called by parsers that discover defects. - The email package parsers always call it with Defect instances. - - """ - if self.raise_on_defect: - raise defect - self.register_defect(obj, defect) - - def register_defect(self, obj, defect): - """Record 'defect' on 'obj'. - - Called by handle_defect if raise_on_defect is False. This method is - part of the Policy API so that Policy subclasses can implement custom - defect handling. The default implementation calls the append method of - the defects attribute of obj. The objects used by the email package by - default that get passed to this method will always have a defects - attribute with an append method. - - """ - obj.defects.append(defect) - - def header_max_count(self, name): - """Return the maximum allowed number of headers named 'name'. - - Called when a header is added to a Message object. If the returned - value is not 0 or None, and there are already a number of headers with - the name 'name' equal to the value returned, a ValueError is raised. - - Because the default behavior of Message's __setitem__ is to append the - value to the list of headers, it is easy to create duplicate headers - without realizing it. This method allows certain headers to be limited - in the number of instances of that header that may be added to a - Message programmatically. (The limit is not observed by the parser, - which will faithfully produce as many headers as exist in the message - being parsed.) - - The default implementation returns None for all header names. - """ - return None - - @abc.abstractmethod - def header_source_parse(self, sourcelines): - """Given a list of linesep terminated strings constituting the lines of - a single header, return the (name, value) tuple that should be stored - in the model. The input lines should retain their terminating linesep - characters. The lines passed in by the email package may contain - surrogateescaped binary data. - """ - raise NotImplementedError - - @abc.abstractmethod - def header_store_parse(self, name, value): - """Given the header name and the value provided by the application - program, return the (name, value) that should be stored in the model. - """ - raise NotImplementedError - - @abc.abstractmethod - def header_fetch_parse(self, name, value): - """Given the header name and the value from the model, return the value - to be returned to the application program that is requesting that - header. The value passed in by the email package may contain - surrogateescaped binary data if the lines were parsed by a BytesParser. - The returned value should not contain any surrogateescaped data. - - """ - raise NotImplementedError - - @abc.abstractmethod - def fold(self, name, value): - """Given the header name and the value from the model, return a string - containing linesep characters that implement the folding of the header - according to the policy controls. The value passed in by the email - package may contain surrogateescaped binary data if the lines were - parsed by a BytesParser. The returned value should not contain any - surrogateescaped data. - - """ - raise NotImplementedError - - @abc.abstractmethod - def fold_binary(self, name, value): - """Given the header name and the value from the model, return binary - data containing linesep characters that implement the folding of the - header according to the policy controls. The value passed in by the - email package may contain surrogateescaped binary data. - - """ - raise NotImplementedError - - -@_extend_docstrings -class Compat32(Policy): - - """+ - This particular policy is the backward compatibility Policy. It - replicates the behavior of the email package version 5.1. - """ - - def _sanitize_header(self, name, value): - # If the header value contains surrogates, return a Header using - # the unknown-8bit charset to encode the bytes as encoded words. - if not isinstance(value, str): - # Assume it is already a header object - return value - if _has_surrogates(value): - return header.Header(value, charset=_charset.UNKNOWN8BIT, - header_name=name) - else: - return value - - def header_source_parse(self, sourcelines): - """+ - The name is parsed as everything up to the ':' and returned unmodified. - The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and - stripping any trailing carriage return or linefeed characters. - - """ - name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) - return (name, value.rstrip('\r\n')) - - def header_store_parse(self, name, value): - """+ - The name and value are returned unmodified. - """ - return (name, value) - - def header_fetch_parse(self, name, value): - """+ - If the value contains binary data, it is converted into a Header object - using the unknown-8bit charset. Otherwise it is returned unmodified. - """ - return self._sanitize_header(name, value) - - def fold(self, name, value): - """+ - Headers are folded using the Header folding algorithm, which preserves - existing line breaks in the value, and wraps each resulting line to the - max_line_length. Non-ASCII binary data are CTE encoded using the - unknown-8bit charset. - - """ - return self._fold(name, value, sanitize=True) - - def fold_binary(self, name, value): - """+ - Headers are folded using the Header folding algorithm, which preserves - existing line breaks in the value, and wraps each resulting line to the - max_line_length. If cte_type is 7bit, non-ascii binary data is CTE - encoded using the unknown-8bit charset. Otherwise the original source - header is used, with its existing line breaks and/or binary data. - - """ - folded = self._fold(name, value, sanitize=self.cte_type=='7bit') - return folded.encode('ascii', 'surrogateescape') - - def _fold(self, name, value, sanitize): - parts = [] - parts.append('%s: ' % name) - if isinstance(value, str): - if _has_surrogates(value): - if sanitize: - h = header.Header(value, - charset=_charset.UNKNOWN8BIT, - header_name=name) - else: - # If we have raw 8bit data in a byte string, we have no idea - # what the encoding is. There is no safe way to split this - # string. If it's ascii-subset, then we could do a normal - # ascii split, but if it's multibyte then we could break the - # string. There's no way to know so the least harm seems to - # be to not split the string and risk it being too long. - parts.append(value) - h = None - else: - h = header.Header(value, header_name=name) - else: - # Assume it is a Header-like object. - h = value - if h is not None: - parts.append(h.encode(linesep=self.linesep, - maxlinelen=self.max_line_length)) - parts.append(self.linesep) - return ''.join(parts) - - -compat32 = Compat32() diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/base64mime.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/base64mime.py deleted file mode 100644 index 416d612..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/base64mime.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (C) 2002-2007 Python Software Foundation -# Author: Ben Gertzfield -# Contact: email-sig@python.org - -"""Base64 content transfer encoding per RFCs 2045-2047. - -This module handles the content transfer encoding method defined in RFC 2045 -to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit -characters encoding known as Base64. - -It is used in the MIME standards for email to attach images, audio, and text -using some 8-bit character sets to messages. - -This module provides an interface to encode and decode both headers and bodies -with Base64 encoding. - -RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names -in To:, From:, Cc:, etc. fields, as well as Subject: lines. - -This module does not do the line wrapping or end-of-line character conversion -necessary for proper internationalized headers; it only does dumb encoding and -decoding. To deal with the various line wrapping issues, use the email.header -module. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import range -from future.builtins import bytes - -__all__ = [ - 'body_decode', - 'body_encode', - 'decode', - 'decodestring', - 'header_encode', - 'header_length', - ] - - -from base64 import b64encode -from binascii import b2a_base64, a2b_base64 - -CRLF = '\r\n' -NL = '\n' -EMPTYSTRING = '' - -# See also Charset.py -MISC_LEN = 7 - - -# Helpers -def header_length(bytearray): - """Return the length of s when it is encoded with base64.""" - groups_of_3, leftover = divmod(len(bytearray), 3) - # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. - n = groups_of_3 * 4 - if leftover: - n += 4 - return n - - -def header_encode(header_bytes, charset='iso-8859-1'): - """Encode a single header line with Base64 encoding in a given charset. - - charset names the character set to use to encode the header. It defaults - to iso-8859-1. Base64 encoding is defined in RFC 2045. - """ - if not header_bytes: - return "" - if isinstance(header_bytes, str): - header_bytes = header_bytes.encode(charset) - encoded = b64encode(header_bytes).decode("ascii") - return '=?%s?b?%s?=' % (charset, encoded) - - -def body_encode(s, maxlinelen=76, eol=NL): - r"""Encode a string with base64. - - Each line will be wrapped at, at most, maxlinelen characters (defaults to - 76 characters). - - Each line of encoded text will end with eol, which defaults to "\n". Set - this to "\r\n" if you will be using the result of this function directly - in an email. - """ - if not s: - return s - - encvec = [] - max_unencoded = maxlinelen * 3 // 4 - for i in range(0, len(s), max_unencoded): - # BAW: should encode() inherit b2a_base64()'s dubious behavior in - # adding a newline to the encoded string? - enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") - if enc.endswith(NL) and eol != NL: - enc = enc[:-1] + eol - encvec.append(enc) - return EMPTYSTRING.join(encvec) - - -def decode(string): - """Decode a raw base64 string, returning a bytes object. - - This function does not parse a full MIME header value encoded with - base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high - level email.header class for that functionality. - """ - if not string: - return bytes() - elif isinstance(string, str): - return a2b_base64(string.encode('raw-unicode-escape')) - else: - return a2b_base64(string) - - -# For convenience and backwards compatibility w/ standard base64 module -body_decode = decode -decodestring = decode diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/charset.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/charset.py deleted file mode 100644 index 2385ce6..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/charset.py +++ /dev/null @@ -1,409 +0,0 @@ -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import str -from future.builtins import next - -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: email-sig@python.org - -__all__ = [ - 'Charset', - 'add_alias', - 'add_charset', - 'add_codec', - ] - -from functools import partial - -from future.backports import email -from future.backports.email import errors -from future.backports.email.encoders import encode_7or8bit - - -# Flags for types of header encodings -QP = 1 # Quoted-Printable -BASE64 = 2 # Base64 -SHORTEST = 3 # the shorter of QP and base64, but only for headers - -# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 -RFC2047_CHROME_LEN = 7 - -DEFAULT_CHARSET = 'us-ascii' -UNKNOWN8BIT = 'unknown-8bit' -EMPTYSTRING = '' - - -# Defaults -CHARSETS = { - # input header enc body enc output conv - 'iso-8859-1': (QP, QP, None), - 'iso-8859-2': (QP, QP, None), - 'iso-8859-3': (QP, QP, None), - 'iso-8859-4': (QP, QP, None), - # iso-8859-5 is Cyrillic, and not especially used - # iso-8859-6 is Arabic, also not particularly used - # iso-8859-7 is Greek, QP will not make it readable - # iso-8859-8 is Hebrew, QP will not make it readable - 'iso-8859-9': (QP, QP, None), - 'iso-8859-10': (QP, QP, None), - # iso-8859-11 is Thai, QP will not make it readable - 'iso-8859-13': (QP, QP, None), - 'iso-8859-14': (QP, QP, None), - 'iso-8859-15': (QP, QP, None), - 'iso-8859-16': (QP, QP, None), - 'windows-1252':(QP, QP, None), - 'viscii': (QP, QP, None), - 'us-ascii': (None, None, None), - 'big5': (BASE64, BASE64, None), - 'gb2312': (BASE64, BASE64, None), - 'euc-jp': (BASE64, None, 'iso-2022-jp'), - 'shift_jis': (BASE64, None, 'iso-2022-jp'), - 'iso-2022-jp': (BASE64, None, None), - 'koi8-r': (BASE64, BASE64, None), - 'utf-8': (SHORTEST, BASE64, 'utf-8'), - } - -# Aliases for other commonly-used names for character sets. Map -# them to the real ones used in email. -ALIASES = { - 'latin_1': 'iso-8859-1', - 'latin-1': 'iso-8859-1', - 'latin_2': 'iso-8859-2', - 'latin-2': 'iso-8859-2', - 'latin_3': 'iso-8859-3', - 'latin-3': 'iso-8859-3', - 'latin_4': 'iso-8859-4', - 'latin-4': 'iso-8859-4', - 'latin_5': 'iso-8859-9', - 'latin-5': 'iso-8859-9', - 'latin_6': 'iso-8859-10', - 'latin-6': 'iso-8859-10', - 'latin_7': 'iso-8859-13', - 'latin-7': 'iso-8859-13', - 'latin_8': 'iso-8859-14', - 'latin-8': 'iso-8859-14', - 'latin_9': 'iso-8859-15', - 'latin-9': 'iso-8859-15', - 'latin_10':'iso-8859-16', - 'latin-10':'iso-8859-16', - 'cp949': 'ks_c_5601-1987', - 'euc_jp': 'euc-jp', - 'euc_kr': 'euc-kr', - 'ascii': 'us-ascii', - } - - -# Map charsets to their Unicode codec strings. -CODEC_MAP = { - 'gb2312': 'eucgb2312_cn', - 'big5': 'big5_tw', - # Hack: We don't want *any* conversion for stuff marked us-ascii, as all - # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. - # Let that stuff pass through without conversion to/from Unicode. - 'us-ascii': None, - } - - -# Convenience functions for extending the above mappings -def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): - """Add character set properties to the global registry. - - charset is the input character set, and must be the canonical name of a - character set. - - Optional header_enc and body_enc is either Charset.QP for - quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for - the shortest of qp or base64 encoding, or None for no encoding. SHORTEST - is only valid for header_enc. It describes how message headers and - message bodies in the input charset are to be encoded. Default is no - encoding. - - Optional output_charset is the character set that the output should be - in. Conversions will proceed from input charset, to Unicode, to the - output charset when the method Charset.convert() is called. The default - is to output in the same character set as the input. - - Both input_charset and output_charset must have Unicode codec entries in - the module's charset-to-codec mapping; use add_codec(charset, codecname) - to add codecs the module does not know about. See the codecs module's - documentation for more information. - """ - if body_enc == SHORTEST: - raise ValueError('SHORTEST not allowed for body_enc') - CHARSETS[charset] = (header_enc, body_enc, output_charset) - - -def add_alias(alias, canonical): - """Add a character set alias. - - alias is the alias name, e.g. latin-1 - canonical is the character set's canonical name, e.g. iso-8859-1 - """ - ALIASES[alias] = canonical - - -def add_codec(charset, codecname): - """Add a codec that map characters in the given charset to/from Unicode. - - charset is the canonical name of a character set. codecname is the name - of a Python codec, as appropriate for the second argument to the unicode() - built-in, or to the encode() method of a Unicode string. - """ - CODEC_MAP[charset] = codecname - - -# Convenience function for encoding strings, taking into account -# that they might be unknown-8bit (ie: have surrogate-escaped bytes) -def _encode(string, codec): - string = str(string) - if codec == UNKNOWN8BIT: - return string.encode('ascii', 'surrogateescape') - else: - return string.encode(codec) - - -class Charset(object): - """Map character sets to their email properties. - - This class provides information about the requirements imposed on email - for a specific character set. It also provides convenience routines for - converting between character sets, given the availability of the - applicable codecs. Given a character set, it will do its best to provide - information on how to use that character set in an email in an - RFC-compliant way. - - Certain character sets must be encoded with quoted-printable or base64 - when used in email headers or bodies. Certain character sets must be - converted outright, and are not allowed in email. Instances of this - module expose the following information about a character set: - - input_charset: The initial character set specified. Common aliases - are converted to their `official' email names (e.g. latin_1 - is converted to iso-8859-1). Defaults to 7-bit us-ascii. - - header_encoding: If the character set must be encoded before it can be - used in an email header, this attribute will be set to - Charset.QP (for quoted-printable), Charset.BASE64 (for - base64 encoding), or Charset.SHORTEST for the shortest of - QP or BASE64 encoding. Otherwise, it will be None. - - body_encoding: Same as header_encoding, but describes the encoding for the - mail message's body, which indeed may be different than the - header encoding. Charset.SHORTEST is not allowed for - body_encoding. - - output_charset: Some character sets must be converted before they can be - used in email headers or bodies. If the input_charset is - one of them, this attribute will contain the name of the - charset output will be converted to. Otherwise, it will - be None. - - input_codec: The name of the Python codec used to convert the - input_charset to Unicode. If no conversion codec is - necessary, this attribute will be None. - - output_codec: The name of the Python codec used to convert Unicode - to the output_charset. If no conversion codec is necessary, - this attribute will have the same value as the input_codec. - """ - def __init__(self, input_charset=DEFAULT_CHARSET): - # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to - # unicode because its .lower() is locale insensitive. If the argument - # is already a unicode, we leave it at that, but ensure that the - # charset is ASCII, as the standard (RFC XXX) requires. - try: - if isinstance(input_charset, str): - input_charset.encode('ascii') - else: - input_charset = str(input_charset, 'ascii') - except UnicodeError: - raise errors.CharsetError(input_charset) - input_charset = input_charset.lower() - # Set the input charset after filtering through the aliases - self.input_charset = ALIASES.get(input_charset, input_charset) - # We can try to guess which encoding and conversion to use by the - # charset_map dictionary. Try that first, but let the user override - # it. - henc, benc, conv = CHARSETS.get(self.input_charset, - (SHORTEST, BASE64, None)) - if not conv: - conv = self.input_charset - # Set the attributes, allowing the arguments to override the default. - self.header_encoding = henc - self.body_encoding = benc - self.output_charset = ALIASES.get(conv, conv) - # Now set the codecs. If one isn't defined for input_charset, - # guess and try a Unicode codec with the same name as input_codec. - self.input_codec = CODEC_MAP.get(self.input_charset, - self.input_charset) - self.output_codec = CODEC_MAP.get(self.output_charset, - self.output_charset) - - def __str__(self): - return self.input_charset.lower() - - __repr__ = __str__ - - def __eq__(self, other): - return str(self) == str(other).lower() - - def __ne__(self, other): - return not self.__eq__(other) - - def get_body_encoding(self): - """Return the content-transfer-encoding used for body encoding. - - This is either the string `quoted-printable' or `base64' depending on - the encoding used, or it is a function in which case you should call - the function with a single argument, the Message object being - encoded. The function should then set the Content-Transfer-Encoding - header itself to whatever is appropriate. - - Returns "quoted-printable" if self.body_encoding is QP. - Returns "base64" if self.body_encoding is BASE64. - Returns conversion function otherwise. - """ - assert self.body_encoding != SHORTEST - if self.body_encoding == QP: - return 'quoted-printable' - elif self.body_encoding == BASE64: - return 'base64' - else: - return encode_7or8bit - - def get_output_charset(self): - """Return the output character set. - - This is self.output_charset if that is not None, otherwise it is - self.input_charset. - """ - return self.output_charset or self.input_charset - - def header_encode(self, string): - """Header-encode a string by converting it first to bytes. - - The type of encoding (base64 or quoted-printable) will be based on - this charset's `header_encoding`. - - :param string: A unicode string for the header. It must be possible - to encode this string to bytes using the character set's - output codec. - :return: The encoded string, with RFC 2047 chrome. - """ - codec = self.output_codec or 'us-ascii' - header_bytes = _encode(string, codec) - # 7bit/8bit encodings return the string unchanged (modulo conversions) - encoder_module = self._get_encoder(header_bytes) - if encoder_module is None: - return string - return encoder_module.header_encode(header_bytes, codec) - - def header_encode_lines(self, string, maxlengths): - """Header-encode a string by converting it first to bytes. - - This is similar to `header_encode()` except that the string is fit - into maximum line lengths as given by the argument. - - :param string: A unicode string for the header. It must be possible - to encode this string to bytes using the character set's - output codec. - :param maxlengths: Maximum line length iterator. Each element - returned from this iterator will provide the next maximum line - length. This parameter is used as an argument to built-in next() - and should never be exhausted. The maximum line lengths should - not count the RFC 2047 chrome. These line lengths are only a - hint; the splitter does the best it can. - :return: Lines of encoded strings, each with RFC 2047 chrome. - """ - # See which encoding we should use. - codec = self.output_codec or 'us-ascii' - header_bytes = _encode(string, codec) - encoder_module = self._get_encoder(header_bytes) - encoder = partial(encoder_module.header_encode, charset=codec) - # Calculate the number of characters that the RFC 2047 chrome will - # contribute to each line. - charset = self.get_output_charset() - extra = len(charset) + RFC2047_CHROME_LEN - # Now comes the hard part. We must encode bytes but we can't split on - # bytes because some character sets are variable length and each - # encoded word must stand on its own. So the problem is you have to - # encode to bytes to figure out this word's length, but you must split - # on characters. This causes two problems: first, we don't know how - # many octets a specific substring of unicode characters will get - # encoded to, and second, we don't know how many ASCII characters - # those octets will get encoded to. Unless we try it. Which seems - # inefficient. In the interest of being correct rather than fast (and - # in the hope that there will be few encoded headers in any such - # message), brute force it. :( - lines = [] - current_line = [] - maxlen = next(maxlengths) - extra - for character in string: - current_line.append(character) - this_line = EMPTYSTRING.join(current_line) - length = encoder_module.header_length(_encode(this_line, charset)) - if length > maxlen: - # This last character doesn't fit so pop it off. - current_line.pop() - # Does nothing fit on the first line? - if not lines and not current_line: - lines.append(None) - else: - separator = (' ' if lines else '') - joined_line = EMPTYSTRING.join(current_line) - header_bytes = _encode(joined_line, codec) - lines.append(encoder(header_bytes)) - current_line = [character] - maxlen = next(maxlengths) - extra - joined_line = EMPTYSTRING.join(current_line) - header_bytes = _encode(joined_line, codec) - lines.append(encoder(header_bytes)) - return lines - - def _get_encoder(self, header_bytes): - if self.header_encoding == BASE64: - return email.base64mime - elif self.header_encoding == QP: - return email.quoprimime - elif self.header_encoding == SHORTEST: - len64 = email.base64mime.header_length(header_bytes) - lenqp = email.quoprimime.header_length(header_bytes) - if len64 < lenqp: - return email.base64mime - else: - return email.quoprimime - else: - return None - - def body_encode(self, string): - """Body-encode a string by converting it first to bytes. - - The type of encoding (base64 or quoted-printable) will be based on - self.body_encoding. If body_encoding is None, we assume the - output charset is a 7bit encoding, so re-encoding the decoded - string using the ascii codec produces the correct string version - of the content. - """ - if not string: - return string - if self.body_encoding is BASE64: - if isinstance(string, str): - string = string.encode(self.output_charset) - return email.base64mime.body_encode(string) - elif self.body_encoding is QP: - # quopromime.body_encode takes a string, but operates on it as if - # it were a list of byte codes. For a (minimal) history on why - # this is so, see changeset 0cf700464177. To correctly encode a - # character set, then, we must turn it into pseudo bytes via the - # latin1 charset, which will encode any byte as a single code point - # between 0 and 255, which is what body_encode is expecting. - if isinstance(string, str): - string = string.encode(self.output_charset) - string = string.decode('latin1') - return email.quoprimime.body_encode(string) - else: - if isinstance(string, str): - string = string.encode(self.output_charset).decode('ascii') - return string diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/encoders.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/encoders.py deleted file mode 100644 index 15d2eb4..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/encoders.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Encodings and related functions.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import str - -__all__ = [ - 'encode_7or8bit', - 'encode_base64', - 'encode_noop', - 'encode_quopri', - ] - - -try: - from base64 import encodebytes as _bencode -except ImportError: - # Py2 compatibility. TODO: test this! - from base64 import encodestring as _bencode -from quopri import encodestring as _encodestring - - -def _qencode(s): - enc = _encodestring(s, quotetabs=True) - # Must encode spaces, which quopri.encodestring() doesn't do - return enc.replace(' ', '=20') - - -def encode_base64(msg): - """Encode the message's payload in Base64. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = str(_bencode(orig), 'ascii') - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'base64' - - -def encode_quopri(msg): - """Encode the message's payload in quoted-printable. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = _qencode(orig) - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'quoted-printable' - - -def encode_7or8bit(msg): - """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" - orig = msg.get_payload() - if orig is None: - # There's no payload. For backwards compatibility we use 7bit - msg['Content-Transfer-Encoding'] = '7bit' - return - # We play a trick to make this go fast. If encoding/decode to ASCII - # succeeds, we know the data must be 7bit, otherwise treat it as 8bit. - try: - if isinstance(orig, str): - orig.encode('ascii') - else: - orig.decode('ascii') - except UnicodeError: - charset = msg.get_charset() - output_cset = charset and charset.output_charset - # iso-2022-* is non-ASCII but encodes to a 7-bit representation - if output_cset and output_cset.lower().startswith('iso-2022-'): - msg['Content-Transfer-Encoding'] = '7bit' - else: - msg['Content-Transfer-Encoding'] = '8bit' - else: - msg['Content-Transfer-Encoding'] = '7bit' - if not isinstance(orig, str): - msg.set_payload(orig.decode('ascii', 'surrogateescape')) - - -def encode_noop(msg): - """Do nothing.""" - # Well, not quite *nothing*: in Python3 we have to turn bytes into a string - # in our internal surrogateescaped form in order to keep the model - # consistent. - orig = msg.get_payload() - if not isinstance(orig, str): - msg.set_payload(orig.decode('ascii', 'surrogateescape')) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/errors.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/errors.py deleted file mode 100644 index 0fe599c..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/errors.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""email package exception classes.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import super - - -class MessageError(Exception): - """Base class for errors in the email package.""" - - -class MessageParseError(MessageError): - """Base class for message parsing errors.""" - - -class HeaderParseError(MessageParseError): - """Error while parsing headers.""" - - -class BoundaryError(MessageParseError): - """Couldn't find terminating boundary.""" - - -class MultipartConversionError(MessageError, TypeError): - """Conversion to a multipart is prohibited.""" - - -class CharsetError(MessageError): - """An illegal charset was given.""" - - -# These are parsing defects which the parser was able to work around. -class MessageDefect(ValueError): - """Base class for a message defect.""" - - def __init__(self, line=None): - if line is not None: - super().__init__(line) - self.line = line - -class NoBoundaryInMultipartDefect(MessageDefect): - """A message claimed to be a multipart but had no boundary parameter.""" - -class StartBoundaryNotFoundDefect(MessageDefect): - """The claimed start boundary was never found.""" - -class CloseBoundaryNotFoundDefect(MessageDefect): - """A start boundary was found, but not the corresponding close boundary.""" - -class FirstHeaderLineIsContinuationDefect(MessageDefect): - """A message had a continuation line as its first header line.""" - -class MisplacedEnvelopeHeaderDefect(MessageDefect): - """A 'Unix-from' header was found in the middle of a header block.""" - -class MissingHeaderBodySeparatorDefect(MessageDefect): - """Found line with no leading whitespace and no colon before blank line.""" -# XXX: backward compatibility, just in case (it was never emitted). -MalformedHeaderDefect = MissingHeaderBodySeparatorDefect - -class MultipartInvariantViolationDefect(MessageDefect): - """A message claimed to be a multipart but no subparts were found.""" - -class InvalidMultipartContentTransferEncodingDefect(MessageDefect): - """An invalid content transfer encoding was set on the multipart itself.""" - -class UndecodableBytesDefect(MessageDefect): - """Header contained bytes that could not be decoded""" - -class InvalidBase64PaddingDefect(MessageDefect): - """base64 encoded sequence had an incorrect length""" - -class InvalidBase64CharactersDefect(MessageDefect): - """base64 encoded sequence had characters not in base64 alphabet""" - -# These errors are specific to header parsing. - -class HeaderDefect(MessageDefect): - """Base class for a header defect.""" - - def __init__(self, *args, **kw): - super().__init__(*args, **kw) - -class InvalidHeaderDefect(HeaderDefect): - """Header is not valid, message gives details.""" - -class HeaderMissingRequiredValue(HeaderDefect): - """A header that must have a value had none""" - -class NonPrintableDefect(HeaderDefect): - """ASCII characters outside the ascii-printable range found""" - - def __init__(self, non_printables): - super().__init__(non_printables) - self.non_printables = non_printables - - def __str__(self): - return ("the following ASCII non-printables found in header: " - "{}".format(self.non_printables)) - -class ObsoleteHeaderDefect(HeaderDefect): - """Header uses syntax declared obsolete by RFC 5322""" - -class NonASCIILocalPartDefect(HeaderDefect): - """local_part contains non-ASCII characters""" - # This defect only occurs during unicode parsing, not when - # parsing messages decoded from binary. diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/feedparser.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/feedparser.py deleted file mode 100644 index 935c26e..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/feedparser.py +++ /dev/null @@ -1,525 +0,0 @@ -# Copyright (C) 2004-2006 Python Software Foundation -# Authors: Baxter, Wouters and Warsaw -# Contact: email-sig@python.org - -"""FeedParser - An email feed parser. - -The feed parser implements an interface for incrementally parsing an email -message, line by line. This has advantages for certain applications, such as -those reading email messages off a socket. - -FeedParser.feed() is the primary interface for pushing new data into the -parser. It returns when there's nothing more it can do with the available -data. When you have no more data to push into the parser, call .close(). -This completes the parsing and returns the root message object. - -The other advantage of this parser is that it will never raise a parsing -exception. Instead, when it finds something unexpected, it adds a 'defect' to -the current message. Defects are just instances that live on the message -object's .defects attribute. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import object, range, super -from future.utils import implements_iterator, PY3 - -__all__ = ['FeedParser', 'BytesFeedParser'] - -import re - -from future.backports.email import errors -from future.backports.email import message -from future.backports.email._policybase import compat32 - -NLCRE = re.compile('\r\n|\r|\n') -NLCRE_bol = re.compile('(\r\n|\r|\n)') -NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') -NLCRE_crack = re.compile('(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character -# except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') -EMPTYSTRING = '' -NL = '\n' - -NeedMoreData = object() - - -# @implements_iterator -class BufferedSubFile(object): - """A file-ish object that can have new data loaded into it. - - You can also push and pop line-matching predicates onto a stack. When the - current predicate matches the current line, a false EOF response - (i.e. empty string) is returned instead. This lets the parser adhere to a - simple abstraction -- it parses until EOF closes the current message. - """ - def __init__(self): - # The last partial line pushed into this object. - self._partial = '' - # The list of full, pushed lines, in reverse order - self._lines = [] - # The stack of false-EOF checking predicates. - self._eofstack = [] - # A flag indicating whether the file has been closed or not. - self._closed = False - - def push_eof_matcher(self, pred): - self._eofstack.append(pred) - - def pop_eof_matcher(self): - return self._eofstack.pop() - - def close(self): - # Don't forget any trailing partial line. - self._lines.append(self._partial) - self._partial = '' - self._closed = True - - def readline(self): - if not self._lines: - if self._closed: - return '' - return NeedMoreData - # Pop the line off the stack and see if it matches the current - # false-EOF predicate. - line = self._lines.pop() - # RFC 2046, section 5.1.2 requires us to recognize outer level - # boundaries at any level of inner nesting. Do this, but be sure it's - # in the order of most to least nested. - for ateof in self._eofstack[::-1]: - if ateof(line): - # We're at the false EOF. But push the last line back first. - self._lines.append(line) - return '' - return line - - def unreadline(self, line): - # Let the consumer push a line back into the buffer. - assert line is not NeedMoreData - self._lines.append(line) - - def push(self, data): - """Push some new data into this object.""" - # Handle any previous leftovers - data, self._partial = self._partial + data, '' - # Crack into lines, but preserve the newlines on the end of each - parts = NLCRE_crack.split(data) - # The *ahem* interesting behaviour of re.split when supplied grouping - # parentheses is that the last element of the resulting list is the - # data after the final RE. In the case of a NL/CR terminated string, - # this is the empty string. - self._partial = parts.pop() - #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r: - # is there a \n to follow later? - if not self._partial and parts and parts[-1].endswith('\r'): - self._partial = parts.pop(-2)+parts.pop() - # parts is a list of strings, alternating between the line contents - # and the eol character(s). Gather up a list of lines after - # re-attaching the newlines. - lines = [] - for i in range(len(parts) // 2): - lines.append(parts[i*2] + parts[i*2+1]) - self.pushlines(lines) - - def pushlines(self, lines): - # Reverse and insert at the front of the lines. - self._lines[:0] = lines[::-1] - - def __iter__(self): - return self - - def __next__(self): - line = self.readline() - if line == '': - raise StopIteration - return line - - -class FeedParser(object): - """A feed-style parser of email.""" - - def __init__(self, _factory=message.Message, **_3to2kwargs): - if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - else: policy = compat32 - """_factory is called with no arguments to create a new message obj - - The policy keyword specifies a policy object that controls a number of - aspects of the parser's operation. The default policy maintains - backward compatibility. - - """ - self._factory = _factory - self.policy = policy - try: - _factory(policy=self.policy) - self._factory_kwds = lambda: {'policy': self.policy} - except TypeError: - # Assume this is an old-style factory - self._factory_kwds = lambda: {} - self._input = BufferedSubFile() - self._msgstack = [] - if PY3: - self._parse = self._parsegen().__next__ - else: - self._parse = self._parsegen().next - self._cur = None - self._last = None - self._headersonly = False - - # Non-public interface for supporting Parser's headersonly flag - def _set_headersonly(self): - self._headersonly = True - - def feed(self, data): - """Push more data into the parser.""" - self._input.push(data) - self._call_parse() - - def _call_parse(self): - try: - self._parse() - except StopIteration: - pass - - def close(self): - """Parse all remaining data and return the root message object.""" - self._input.close() - self._call_parse() - root = self._pop_message() - assert not self._msgstack - # Look for final set of defects - if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): - defect = errors.MultipartInvariantViolationDefect() - self.policy.handle_defect(root, defect) - return root - - def _new_message(self): - msg = self._factory(**self._factory_kwds()) - if self._cur and self._cur.get_content_type() == 'multipart/digest': - msg.set_default_type('message/rfc822') - if self._msgstack: - self._msgstack[-1].attach(msg) - self._msgstack.append(msg) - self._cur = msg - self._last = msg - - def _pop_message(self): - retval = self._msgstack.pop() - if self._msgstack: - self._cur = self._msgstack[-1] - else: - self._cur = None - return retval - - def _parsegen(self): - # Create a new message and start by parsing headers. - self._new_message() - headers = [] - # Collect the headers, searching for a line that doesn't match the RFC - # 2822 header or continuation pattern (including an empty line). - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - if not headerRE.match(line): - # If we saw the RFC defined header/body separator - # (i.e. newline), just throw it away. Otherwise the line is - # part of the body so push it back. - if not NLCRE.match(line): - defect = errors.MissingHeaderBodySeparatorDefect() - self.policy.handle_defect(self._cur, defect) - self._input.unreadline(line) - break - headers.append(line) - # Done with the headers, so parse them and figure out what we're - # supposed to see in the body of the message. - self._parse_headers(headers) - # Headers-only parsing is a backwards compatibility hack, which was - # necessary in the older parser, which could raise errors. All - # remaining lines in the input are thrown into the message body. - if self._headersonly: - lines = [] - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - if self._cur.get_content_type() == 'message/delivery-status': - # message/delivery-status contains blocks of headers separated by - # a blank line. We'll represent each header block as a separate - # nested message object, but the processing is a bit different - # than standard message/* types because there is no body for the - # nested messages. A blank line separates the subparts. - while True: - self._input.push_eof_matcher(NLCRE.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - msg = self._pop_message() - # We need to pop the EOF matcher in order to tell if we're at - # the end of the current file, not the end of the last block - # of message headers. - self._input.pop_eof_matcher() - # The input stream must be sitting at the newline or at the - # EOF. We want to see if we're at the end of this subpart, so - # first consume the blank line, then test the next line to see - # if we're at this subpart's EOF. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - if line == '': - break - # Not at EOF so this is a line we're going to need. - self._input.unreadline(line) - return - if self._cur.get_content_maintype() == 'message': - # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - self._pop_message() - return - if self._cur.get_content_maintype() == 'multipart': - boundary = self._cur.get_boundary() - if boundary is None: - # The message /claims/ to be a multipart but it has not - # defined a boundary. That's a problem which we'll handle by - # reading everything until the EOF and marking the message as - # defective. - defect = errors.NoBoundaryInMultipartDefect() - self.policy.handle_defect(self._cur, defect) - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - # Make sure a valid content type was specified per RFC 2045:6.4. - if (self._cur.get('content-transfer-encoding', '8bit').lower() - not in ('7bit', '8bit', 'binary')): - defect = errors.InvalidMultipartContentTransferEncodingDefect() - self.policy.handle_defect(self._cur, defect) - # Create a line match predicate which matches the inter-part - # boundary as well as the end-of-multipart boundary. Don't push - # this onto the input stream until we've scanned past the - # preamble. - separator = '--' + boundary - boundaryre = re.compile( - '(?P<sep>' + re.escape(separator) + - r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$') - capturing_preamble = True - preamble = [] - linesep = False - close_boundary_seen = False - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - mo = boundaryre.match(line) - if mo: - # If we're looking at the end boundary, we're done with - # this multipart. If there was a newline at the end of - # the closing boundary, then we need to initialize the - # epilogue with the empty string (see below). - if mo.group('end'): - close_boundary_seen = True - linesep = mo.group('linesep') - break - # We saw an inter-part boundary. Were we in the preamble? - if capturing_preamble: - if preamble: - # According to RFC 2046, the last newline belongs - # to the boundary. - lastline = preamble[-1] - eolmo = NLCRE_eol.search(lastline) - if eolmo: - preamble[-1] = lastline[:-len(eolmo.group(0))] - self._cur.preamble = EMPTYSTRING.join(preamble) - capturing_preamble = False - self._input.unreadline(line) - continue - # We saw a boundary separating two parts. Consume any - # multiple boundary lines that may be following. Our - # interpretation of RFC 2046 BNF grammar does not produce - # body parts within such double boundaries. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - mo = boundaryre.match(line) - if not mo: - self._input.unreadline(line) - break - # Recurse to parse this subpart; the input stream points - # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - # Because of RFC 2046, the newline preceding the boundary - # separator actually belongs to the boundary, not the - # previous subpart's payload (or epilogue if the previous - # part is a multipart). - if self._last.get_content_maintype() == 'multipart': - epilogue = self._last.epilogue - if epilogue == '': - self._last.epilogue = None - elif epilogue is not None: - mo = NLCRE_eol.search(epilogue) - if mo: - end = len(mo.group(0)) - self._last.epilogue = epilogue[:-end] - else: - payload = self._last._payload - if isinstance(payload, str): - mo = NLCRE_eol.search(payload) - if mo: - payload = payload[:-len(mo.group(0))] - self._last._payload = payload - self._input.pop_eof_matcher() - self._pop_message() - # Set the multipart up for newline cleansing, which will - # happen if we're in a nested multipart. - self._last = self._cur - else: - # I think we must be in the preamble - assert capturing_preamble - preamble.append(line) - # We've seen either the EOF or the end boundary. If we're still - # capturing the preamble, we never saw the start boundary. Note - # that as a defect and store the captured text as the payload. - if capturing_preamble: - defect = errors.StartBoundaryNotFoundDefect() - self.policy.handle_defect(self._cur, defect) - self._cur.set_payload(EMPTYSTRING.join(preamble)) - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # If we're not processing the preamble, then we might have seen - # EOF without seeing that end boundary...that is also a defect. - if not close_boundary_seen: - defect = errors.CloseBoundaryNotFoundDefect() - self.policy.handle_defect(self._cur, defect) - return - # Everything from here to the EOF is epilogue. If the end boundary - # ended in a newline, we'll need to make sure the epilogue isn't - # None - if linesep: - epilogue = [''] - else: - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - epilogue.append(line) - # Any CRLF at the front of the epilogue is not technically part of - # the epilogue. Also, watch out for an empty string epilogue, - # which means a single newline. - if epilogue: - firstline = epilogue[0] - bolmo = NLCRE_bol.match(firstline) - if bolmo: - epilogue[0] = firstline[len(bolmo.group(0)):] - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # Otherwise, it's some non-multipart type, so the entire rest of the - # file contents becomes the payload. - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - - def _parse_headers(self, lines): - # Passed a list of lines that make up the headers for the current msg - lastheader = '' - lastvalue = [] - for lineno, line in enumerate(lines): - # Check for continuation - if line[0] in ' \t': - if not lastheader: - # The first line of the headers was a continuation. This - # is illegal, so let's note the defect, store the illegal - # line, and ignore it for purposes of headers. - defect = errors.FirstHeaderLineIsContinuationDefect(line) - self.policy.handle_defect(self._cur, defect) - continue - lastvalue.append(line) - continue - if lastheader: - self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) - lastheader, lastvalue = '', [] - # Check for envelope header, i.e. unix-from - if line.startswith('From '): - if lineno == 0: - # Strip off the trailing newline - mo = NLCRE_eol.search(line) - if mo: - line = line[:-len(mo.group(0))] - self._cur.set_unixfrom(line) - continue - elif lineno == len(lines) - 1: - # Something looking like a unix-from at the end - it's - # probably the first line of the body, so push back the - # line and stop. - self._input.unreadline(line) - return - else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. - defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) - continue - # Split the line on the colon separating field name from value. - # There will always be a colon, because if there wasn't the part of - # the parser that calls us would have started parsing the body. - i = line.find(':') - assert i>0, "_parse_headers fed line with no : and no leading WS" - lastheader = line[:i] - lastvalue = [line] - # Done with all the lines, so handle the last header. - if lastheader: - self._cur.set_raw(*self.policy.header_source_parse(lastvalue)) - - -class BytesFeedParser(FeedParser): - """Like FeedParser, but feed accepts bytes.""" - - def feed(self, data): - super().feed(data.decode('ascii', 'surrogateescape')) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/generator.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/generator.py deleted file mode 100644 index 53493d0..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/generator.py +++ /dev/null @@ -1,498 +0,0 @@ -# Copyright (C) 2001-2010 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Classes to generate plain text from a message object tree.""" -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import super -from future.builtins import str - -__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] - -import re -import sys -import time -import random -import warnings - -from io import StringIO, BytesIO -from future.backports.email._policybase import compat32 -from future.backports.email.header import Header -from future.backports.email.utils import _has_surrogates -import future.backports.email.charset as _charset - -UNDERSCORE = '_' -NL = '\n' # XXX: no longer used by the code below. - -fcre = re.compile(r'^From ', re.MULTILINE) - - -class Generator(object): - """Generates output from a Message object tree. - - This basic generator writes the message to the given file object as plain - text. - """ - # - # Public interface - # - - def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwargs): - if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - else: policy = None - """Create the generator for message flattening. - - outfp is the output file-like object for writing the message to. It - must have a write() method. - - Optional mangle_from_ is a flag that, when True (the default), escapes - From_ lines in the body of the message by putting a `>' in front of - them. - - Optional maxheaderlen specifies the longest length for a non-continued - header. When a header line is longer (in characters, with tabs - expanded to 8 spaces) than maxheaderlen, the header will split as - defined in the Header class. Set maxheaderlen to zero to disable - header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. - - The policy keyword specifies a policy object that controls a number of - aspects of the generator's operation. The default policy maintains - backward compatibility. - - """ - self._fp = outfp - self._mangle_from_ = mangle_from_ - self.maxheaderlen = maxheaderlen - self.policy = policy - - def write(self, s): - # Just delegate to the file object - self._fp.write(s) - - def flatten(self, msg, unixfrom=False, linesep=None): - r"""Print the message object tree rooted at msg to the output file - specified when the Generator instance was created. - - unixfrom is a flag that forces the printing of a Unix From_ delimiter - before the first object in the message tree. If the original message - has no From_ delimiter, a `standard' one is crafted. By default, this - is False to inhibit the printing of any From_ delimiter. - - Note that for subobjects, no From_ line is printed. - - linesep specifies the characters used to indicate a new line in - the output. The default value is determined by the policy. - - """ - # We use the _XXX constants for operating on data that comes directly - # from the msg, and _encoded_XXX constants for operating on data that - # has already been converted (to bytes in the BytesGenerator) and - # inserted into a temporary buffer. - policy = msg.policy if self.policy is None else self.policy - if linesep is not None: - policy = policy.clone(linesep=linesep) - if self.maxheaderlen is not None: - policy = policy.clone(max_line_length=self.maxheaderlen) - self._NL = policy.linesep - self._encoded_NL = self._encode(self._NL) - self._EMPTY = '' - self._encoded_EMTPY = self._encode('') - # Because we use clone (below) when we recursively process message - # subparts, and because clone uses the computed policy (not None), - # submessages will automatically get set to the computed policy when - # they are processed by this code. - old_gen_policy = self.policy - old_msg_policy = msg.policy - try: - self.policy = policy - msg.policy = policy - if unixfrom: - ufrom = msg.get_unixfrom() - if not ufrom: - ufrom = 'From nobody ' + time.ctime(time.time()) - self.write(ufrom + self._NL) - self._write(msg) - finally: - self.policy = old_gen_policy - msg.policy = old_msg_policy - - def clone(self, fp): - """Clone this generator with the exact same options.""" - return self.__class__(fp, - self._mangle_from_, - None, # Use policy setting, which we've adjusted - policy=self.policy) - - # - # Protected interface - undocumented ;/ - # - - # Note that we use 'self.write' when what we are writing is coming from - # the source, and self._fp.write when what we are writing is coming from a - # buffer (because the Bytes subclass has already had a chance to transform - # the data in its write method in that case). This is an entirely - # pragmatic split determined by experiment; we could be more general by - # always using write and having the Bytes subclass write method detect when - # it has already transformed the input; but, since this whole thing is a - # hack anyway this seems good enough. - - # Similarly, we have _XXX and _encoded_XXX attributes that are used on - # source and buffer data, respectively. - _encoded_EMPTY = '' - - def _new_buffer(self): - # BytesGenerator overrides this to return BytesIO. - return StringIO() - - def _encode(self, s): - # BytesGenerator overrides this to encode strings to bytes. - return s - - def _write_lines(self, lines): - # We have to transform the line endings. - if not lines: - return - lines = lines.splitlines(True) - for line in lines[:-1]: - self.write(line.rstrip('\r\n')) - self.write(self._NL) - laststripped = lines[-1].rstrip('\r\n') - self.write(laststripped) - if len(lines[-1]) != len(laststripped): - self.write(self._NL) - - def _write(self, msg): - # We can't write the headers yet because of the following scenario: - # say a multipart message includes the boundary string somewhere in - # its body. We'd have to calculate the new boundary /before/ we write - # the headers so that we can write the correct Content-Type: - # parameter. - # - # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a buffer. The we write the - # headers and the buffer contents. That way, subpart handlers can - # Do The Right Thing, and can still modify the Content-Type: header if - # necessary. - oldfp = self._fp - try: - self._fp = sfp = self._new_buffer() - self._dispatch(msg) - finally: - self._fp = oldfp - # Write the headers. First we see if the message object wants to - # handle that itself. If not, we'll do it generically. - meth = getattr(msg, '_write_headers', None) - if meth is None: - self._write_headers(msg) - else: - meth(self) - self._fp.write(sfp.getvalue()) - - def _dispatch(self, msg): - # Get the Content-Type: for the message, then try to dispatch to - # self._handle_<maintype>_<subtype>(). If there's no handler for the - # full MIME type, then dispatch to self._handle_<maintype>(). If - # that's missing too, then dispatch to self._writeBody(). - main = msg.get_content_maintype() - sub = msg.get_content_subtype() - specific = UNDERSCORE.join((main, sub)).replace('-', '_') - meth = getattr(self, '_handle_' + specific, None) - if meth is None: - generic = main.replace('-', '_') - meth = getattr(self, '_handle_' + generic, None) - if meth is None: - meth = self._writeBody - meth(msg) - - # - # Default handlers - # - - def _write_headers(self, msg): - for h, v in msg.raw_items(): - self.write(self.policy.fold(h, v)) - # A blank line always separates headers from body - self.write(self._NL) - - # - # Handlers for writing types and subtypes - # - - def _handle_text(self, msg): - payload = msg.get_payload() - if payload is None: - return - if not isinstance(payload, str): - raise TypeError('string payload expected: %s' % type(payload)) - if _has_surrogates(msg._payload): - charset = msg.get_param('charset') - if charset is not None: - del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) - payload = msg.get_payload() - if self._mangle_from_: - payload = fcre.sub('>From ', payload) - self._write_lines(payload) - - # Default body handler - _writeBody = _handle_text - - def _handle_multipart(self, msg): - # The trick here is to write out each part separately, merge them all - # together, and then make sure that the boundary we've chosen isn't - # present in the payload. - msgtexts = [] - subparts = msg.get_payload() - if subparts is None: - subparts = [] - elif isinstance(subparts, str): - # e.g. a non-strict parse of a message with no starting boundary. - self.write(subparts) - return - elif not isinstance(subparts, list): - # Scalar payload - subparts = [subparts] - for part in subparts: - s = self._new_buffer() - g = self.clone(s) - g.flatten(part, unixfrom=False, linesep=self._NL) - msgtexts.append(s.getvalue()) - # BAW: What about boundaries that are wrapped in double-quotes? - boundary = msg.get_boundary() - if not boundary: - # Create a boundary that doesn't appear in any of the - # message texts. - alltext = self._encoded_NL.join(msgtexts) - boundary = self._make_boundary(alltext) - msg.set_boundary(boundary) - # If there's a preamble, write it out, with a trailing CRLF - if msg.preamble is not None: - if self._mangle_from_: - preamble = fcre.sub('>From ', msg.preamble) - else: - preamble = msg.preamble - self._write_lines(preamble) - self.write(self._NL) - # dash-boundary transport-padding CRLF - self.write('--' + boundary + self._NL) - # body-part - if msgtexts: - self._fp.write(msgtexts.pop(0)) - # *encapsulation - # --> delimiter transport-padding - # --> CRLF body-part - for body_part in msgtexts: - # delimiter transport-padding CRLF - self.write(self._NL + '--' + boundary + self._NL) - # body-part - self._fp.write(body_part) - # close-delimiter transport-padding - self.write(self._NL + '--' + boundary + '--') - if msg.epilogue is not None: - self.write(self._NL) - if self._mangle_from_: - epilogue = fcre.sub('>From ', msg.epilogue) - else: - epilogue = msg.epilogue - self._write_lines(epilogue) - - def _handle_multipart_signed(self, msg): - # The contents of signed parts has to stay unmodified in order to keep - # the signature intact per RFC1847 2.1, so we disable header wrapping. - # RDM: This isn't enough to completely preserve the part, but it helps. - p = self.policy - self.policy = p.clone(max_line_length=0) - try: - self._handle_multipart(msg) - finally: - self.policy = p - - def _handle_message_delivery_status(self, msg): - # We can't just write the headers directly to self's file object - # because this will leave an extra newline between the last header - # block and the boundary. Sigh. - blocks = [] - for part in msg.get_payload(): - s = self._new_buffer() - g = self.clone(s) - g.flatten(part, unixfrom=False, linesep=self._NL) - text = s.getvalue() - lines = text.split(self._encoded_NL) - # Strip off the unnecessary trailing empty line - if lines and lines[-1] == self._encoded_EMPTY: - blocks.append(self._encoded_NL.join(lines[:-1])) - else: - blocks.append(text) - # Now join all the blocks with an empty line. This has the lovely - # effect of separating each block with an empty line, but not adding - # an extra one after the last one. - self._fp.write(self._encoded_NL.join(blocks)) - - def _handle_message(self, msg): - s = self._new_buffer() - g = self.clone(s) - # The payload of a message/rfc822 part should be a multipart sequence - # of length 1. The zeroth element of the list should be the Message - # object for the subpart. Extract that object, stringify it, and - # write it out. - # Except, it turns out, when it's a string instead, which happens when - # and only when HeaderParser is used on a message of mime type - # message/rfc822. Such messages are generated by, for example, - # Groupwise when forwarding unadorned messages. (Issue 7970.) So - # in that case we just emit the string body. - payload = msg._payload - if isinstance(payload, list): - g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) - payload = s.getvalue() - else: - payload = self._encode(payload) - self._fp.write(payload) - - # This used to be a module level function; we use a classmethod for this - # and _compile_re so we can continue to provide the module level function - # for backward compatibility by doing - # _make_boudary = Generator._make_boundary - # at the end of the module. It *is* internal, so we could drop that... - @classmethod - def _make_boundary(cls, text=None): - # Craft a random boundary. If text is given, ensure that the chosen - # boundary doesn't appear in the text. - token = random.randrange(sys.maxsize) - boundary = ('=' * 15) + (_fmt % token) + '==' - if text is None: - return boundary - b = boundary - counter = 0 - while True: - cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) - if not cre.search(text): - break - b = boundary + '.' + str(counter) - counter += 1 - return b - - @classmethod - def _compile_re(cls, s, flags): - return re.compile(s, flags) - -class BytesGenerator(Generator): - """Generates a bytes version of a Message object tree. - - Functionally identical to the base Generator except that the output is - bytes and not string. When surrogates were used in the input to encode - bytes, these are decoded back to bytes for output. If the policy has - cte_type set to 7bit, then the message is transformed such that the - non-ASCII bytes are properly content transfer encoded, using the charset - unknown-8bit. - - The outfp object must accept bytes in its write method. - """ - - # Bytes versions of this constant for use in manipulating data from - # the BytesIO buffer. - _encoded_EMPTY = b'' - - def write(self, s): - self._fp.write(str(s).encode('ascii', 'surrogateescape')) - - def _new_buffer(self): - return BytesIO() - - def _encode(self, s): - return s.encode('ascii') - - def _write_headers(self, msg): - # This is almost the same as the string version, except for handling - # strings with 8bit bytes. - for h, v in msg.raw_items(): - self._fp.write(self.policy.fold_binary(h, v)) - # A blank line always separates headers from body - self.write(self._NL) - - def _handle_text(self, msg): - # If the string has surrogates the original source was bytes, so - # just write it back out. - if msg._payload is None: - return - if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': - if self._mangle_from_: - msg._payload = fcre.sub(">From ", msg._payload) - self._write_lines(msg._payload) - else: - super(BytesGenerator,self)._handle_text(msg) - - # Default body handler - _writeBody = _handle_text - - @classmethod - def _compile_re(cls, s, flags): - return re.compile(s.encode('ascii'), flags) - - -_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' - -class DecodedGenerator(Generator): - """Generates a text representation of a message. - - Like the Generator base class, except that non-text parts are substituted - with a format string representing the part. - """ - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): - """Like Generator.__init__() except that an additional optional - argument is allowed. - - Walks through all subparts of a message. If the subpart is of main - type `text', then it prints the decoded payload of the subpart. - - Otherwise, fmt is a format string that is used instead of the message - payload. fmt is expanded with the following keywords (in - %(keyword)s format): - - type : Full MIME type of the non-text part - maintype : Main MIME type of the non-text part - subtype : Sub-MIME type of the non-text part - filename : Filename of the non-text part - description: Description associated with the non-text part - encoding : Content transfer encoding of the non-text part - - The default value for fmt is None, meaning - - [Non-text (%(type)s) part of message omitted, filename %(filename)s] - """ - Generator.__init__(self, outfp, mangle_from_, maxheaderlen) - if fmt is None: - self._fmt = _FMT - else: - self._fmt = fmt - - def _dispatch(self, msg): - for part in msg.walk(): - maintype = part.get_content_maintype() - if maintype == 'text': - print(part.get_payload(decode=False), file=self) - elif maintype == 'multipart': - # Just skip this - pass - else: - print(self._fmt % { - 'type' : part.get_content_type(), - 'maintype' : part.get_content_maintype(), - 'subtype' : part.get_content_subtype(), - 'filename' : part.get_filename('[no filename]'), - 'description': part.get('Content-Description', - '[no description]'), - 'encoding' : part.get('Content-Transfer-Encoding', - '[no encoding]'), - }, file=self) - - -# Helper used by Generator._make_boundary -_width = len(repr(sys.maxsize-1)) -_fmt = '%%0%dd' % _width - -# Backward compatibility -_make_boundary = Generator._make_boundary diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/header.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/header.py deleted file mode 100644 index 63bf038..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/header.py +++ /dev/null @@ -1,581 +0,0 @@ -# Copyright (C) 2002-2007 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: email-sig@python.org - -"""Header encoding and decoding functionality.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import bytes, range, str, super, zip - -__all__ = [ - 'Header', - 'decode_header', - 'make_header', - ] - -import re -import binascii - -from future.backports import email -from future.backports.email import base64mime -from future.backports.email.errors import HeaderParseError -import future.backports.email.charset as _charset - -# Helpers -from future.backports.email.quoprimime import _max_append, header_decode - -Charset = _charset.Charset - -NL = '\n' -SPACE = ' ' -BSPACE = b' ' -SPACE8 = ' ' * 8 -EMPTYSTRING = '' -MAXLINELEN = 78 -FWS = ' \t' - -USASCII = Charset('us-ascii') -UTF8 = Charset('utf-8') - -# Match encoded-word strings in the form =?charset?q?Hello_World?= -ecre = re.compile(r''' - =\? # literal =? - (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P<encoding>[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) - -# Field name regexp, including trailing colon, but not separating whitespace, -# according to RFC 2822. Character range is from tilde to exclamation mark. -# For use with .match() -fcre = re.compile(r'[\041-\176]+:$') - -# Find a header embedded in a putative header value. Used to check for -# header injection attack. -_embeded_header = re.compile(r'\n[^ \t]+:') - - -def decode_header(header): - """Decode a message header value without converting charset. - - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. - - header may be a string that may or may not contain RFC2047 encoded words, - or it may be a Header object. - - An email.errors.HeaderParseError may be raised when certain decoding error - occurs (e.g. a base64 decoding exception). - """ - # If it is a Header object, we can just return the encoded chunks. - if hasattr(header, '_chunks'): - return [(_charset._encode(string, str(charset)), str(charset)) - for string, charset in header._chunks] - # If no encoding, just return the header with no charset. - if not ecre.search(header): - return [(header, None)] - # First step is to parse all the encoded parts into triplets of the form - # (encoded_string, encoding, charset). For unencoded strings, the last - # two parts will be None. - words = [] - for line in header.splitlines(): - parts = ecre.split(line) - first = True - while parts: - unencoded = parts.pop(0) - if first: - unencoded = unencoded.lstrip() - first = False - if unencoded: - words.append((unencoded, None, None)) - if parts: - charset = parts.pop(0).lower() - encoding = parts.pop(0).lower() - encoded = parts.pop(0) - words.append((encoded, encoding, charset)) - # Now loop over words and remove words that consist of whitespace - # between two encoded strings. - import sys - droplist = [] - for n, w in enumerate(words): - if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): - droplist.append(n-1) - for d in reversed(droplist): - del words[d] - - # The next step is to decode each encoded word by applying the reverse - # base64 or quopri transformation. decoded_words is now a list of the - # form (decoded_word, charset). - decoded_words = [] - for encoded_string, encoding, charset in words: - if encoding is None: - # This is an unencoded word. - decoded_words.append((encoded_string, charset)) - elif encoding == 'q': - word = header_decode(encoded_string) - decoded_words.append((word, charset)) - elif encoding == 'b': - paderr = len(encoded_string) % 4 # Postel's law: add missing padding - if paderr: - encoded_string += '==='[:4 - paderr] - try: - word = base64mime.decode(encoded_string) - except binascii.Error: - raise HeaderParseError('Base64 decoding error') - else: - decoded_words.append((word, charset)) - else: - raise AssertionError('Unexpected encoding: ' + encoding) - # Now convert all words to bytes and collapse consecutive runs of - # similarly encoded words. - collapsed = [] - last_word = last_charset = None - for word, charset in decoded_words: - if isinstance(word, str): - word = bytes(word, 'raw-unicode-escape') - if last_word is None: - last_word = word - last_charset = charset - elif charset != last_charset: - collapsed.append((last_word, last_charset)) - last_word = word - last_charset = charset - elif last_charset is None: - last_word += BSPACE + word - else: - last_word += word - collapsed.append((last_word, last_charset)) - return collapsed - - -def make_header(decoded_seq, maxlinelen=None, header_name=None, - continuation_ws=' '): - """Create a Header from a sequence of pairs as returned by decode_header() - - decode_header() takes a header value string and returns a sequence of - pairs of the format (decoded_string, charset) where charset is the string - name of the character set. - - This function takes one of those sequence of pairs and returns a Header - instance. Optional maxlinelen, header_name, and continuation_ws are as in - the Header constructor. - """ - h = Header(maxlinelen=maxlinelen, header_name=header_name, - continuation_ws=continuation_ws) - for s, charset in decoded_seq: - # None means us-ascii but we can simply pass it on to h.append() - if charset is not None and not isinstance(charset, Charset): - charset = Charset(charset) - h.append(s, charset) - return h - - -class Header(object): - def __init__(self, s=None, charset=None, - maxlinelen=None, header_name=None, - continuation_ws=' ', errors='strict'): - """Create a MIME-compliant header that can contain many character sets. - - Optional s is the initial header value. If None, the initial header - value is not set. You can later append to the header with .append() - method calls. s may be a byte string or a Unicode string, but see the - .append() documentation for semantics. - - Optional charset serves two purposes: it has the same meaning as the - charset argument to the .append() method. It also sets the default - character set for all subsequent .append() calls that omit the charset - argument. If charset is not provided in the constructor, the us-ascii - charset is used both as s's initial charset and as the default for - subsequent .append() calls. - - The maximum line length can be specified explicitly via maxlinelen. For - splitting the first line to a shorter value (to account for the field - header which isn't included in s, e.g. `Subject') pass in the name of - the field in header_name. The default maxlinelen is 78 as recommended - by RFC 2822. - - continuation_ws must be RFC 2822 compliant folding whitespace (usually - either a space or a hard tab) which will be prepended to continuation - lines. - - errors is passed through to the .append() call. - """ - if charset is None: - charset = USASCII - elif not isinstance(charset, Charset): - charset = Charset(charset) - self._charset = charset - self._continuation_ws = continuation_ws - self._chunks = [] - if s is not None: - self.append(s, charset, errors) - if maxlinelen is None: - maxlinelen = MAXLINELEN - self._maxlinelen = maxlinelen - if header_name is None: - self._headerlen = 0 - else: - # Take the separating colon and space into account. - self._headerlen = len(header_name) + 2 - - def __str__(self): - """Return the string value of the header.""" - self._normalize() - uchunks = [] - lastcs = None - lastspace = None - for string, charset in self._chunks: - # We must preserve spaces between encoded and non-encoded word - # boundaries, which means for us we need to add a space when we go - # from a charset to None/us-ascii, or from None/us-ascii to a - # charset. Only do this for the second and subsequent chunks. - # Don't add a space if the None/us-ascii string already has - # a space (trailing or leading depending on transition) - nextcs = charset - if nextcs == _charset.UNKNOWN8BIT: - original_bytes = string.encode('ascii', 'surrogateescape') - string = original_bytes.decode('ascii', 'replace') - if uchunks: - hasspace = string and self._nonctext(string[0]) - if lastcs not in (None, 'us-ascii'): - if nextcs in (None, 'us-ascii') and not hasspace: - uchunks.append(SPACE) - nextcs = None - elif nextcs not in (None, 'us-ascii') and not lastspace: - uchunks.append(SPACE) - lastspace = string and self._nonctext(string[-1]) - lastcs = nextcs - uchunks.append(string) - return EMPTYSTRING.join(uchunks) - - # Rich comparison operators for equality only. BAW: does it make sense to - # have or explicitly disable <, <=, >, >= operators? - def __eq__(self, other): - # other may be a Header or a string. Both are fine so coerce - # ourselves to a unicode (of the unencoded header value), swap the - # args and do another comparison. - return other == str(self) - - def __ne__(self, other): - return not self == other - - def append(self, s, charset=None, errors='strict'): - """Append a string to the MIME header. - - Optional charset, if given, should be a Charset instance or the name - of a character set (which will be converted to a Charset instance). A - value of None (the default) means that the charset given in the - constructor is used. - - s may be a byte string or a Unicode string. If it is a byte string - (i.e. isinstance(s, str) is false), then charset is the encoding of - that byte string, and a UnicodeError will be raised if the string - cannot be decoded with that charset. If s is a Unicode string, then - charset is a hint specifying the character set of the characters in - the string. In either case, when producing an RFC 2822 compliant - header using RFC 2047 rules, the string will be encoded using the - output codec of the charset. If the string cannot be encoded to the - output codec, a UnicodeError will be raised. - - Optional `errors' is passed as the errors argument to the decode - call if s is a byte string. - """ - if charset is None: - charset = self._charset - elif not isinstance(charset, Charset): - charset = Charset(charset) - if not isinstance(s, str): - input_charset = charset.input_codec or 'us-ascii' - if input_charset == _charset.UNKNOWN8BIT: - s = s.decode('us-ascii', 'surrogateescape') - else: - s = s.decode(input_charset, errors) - # Ensure that the bytes we're storing can be decoded to the output - # character set, otherwise an early error is raised. - output_charset = charset.output_codec or 'us-ascii' - if output_charset != _charset.UNKNOWN8BIT: - try: - s.encode(output_charset, errors) - except UnicodeEncodeError: - if output_charset!='us-ascii': - raise - charset = UTF8 - self._chunks.append((s, charset)) - - def _nonctext(self, s): - """True if string s is not a ctext character of RFC822. - """ - return s.isspace() or s in ('(', ')', '\\') - - def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): - r"""Encode a message header into an RFC-compliant format. - - There are many issues involved in converting a given string for use in - an email header. Only certain character sets are readable in most - email clients, and as header strings can only contain a subset of - 7-bit ASCII, care must be taken to properly convert and encode (with - Base64 or quoted-printable) header strings. In addition, there is a - 75-character length limit on any given encoded header field, so - line-wrapping must be performed, even with double-byte character sets. - - Optional maxlinelen specifies the maximum length of each generated - line, exclusive of the linesep string. Individual lines may be longer - than maxlinelen if a folding point cannot be found. The first line - will be shorter by the length of the header name plus ": " if a header - name was specified at Header construction time. The default value for - maxlinelen is determined at header construction time. - - Optional splitchars is a string containing characters which should be - given extra weight by the splitting algorithm during normal header - wrapping. This is in very rough support of RFC 2822's `higher level - syntactic breaks': split points preceded by a splitchar are preferred - during line splitting, with the characters preferred in the order in - which they appear in the string. Space and tab may be included in the - string to indicate whether preference should be given to one over the - other as a split point when other split chars do not appear in the line - being split. Splitchars does not affect RFC 2047 encoded lines. - - Optional linesep is a string to be used to separate the lines of - the value. The default value is the most useful for typical - Python applications, but it can be set to \r\n to produce RFC-compliant - line separators when needed. - """ - self._normalize() - if maxlinelen is None: - maxlinelen = self._maxlinelen - # A maxlinelen of 0 means don't wrap. For all practical purposes, - # choosing a huge number here accomplishes that and makes the - # _ValueFormatter algorithm much simpler. - if maxlinelen == 0: - maxlinelen = 1000000 - formatter = _ValueFormatter(self._headerlen, maxlinelen, - self._continuation_ws, splitchars) - lastcs = None - hasspace = lastspace = None - for string, charset in self._chunks: - if hasspace is not None: - hasspace = string and self._nonctext(string[0]) - import sys - if lastcs not in (None, 'us-ascii'): - if not hasspace or charset not in (None, 'us-ascii'): - formatter.add_transition() - elif charset not in (None, 'us-ascii') and not lastspace: - formatter.add_transition() - lastspace = string and self._nonctext(string[-1]) - lastcs = charset - hasspace = False - lines = string.splitlines() - if lines: - formatter.feed('', lines[0], charset) - else: - formatter.feed('', '', charset) - for line in lines[1:]: - formatter.newline() - if charset.header_encoding is not None: - formatter.feed(self._continuation_ws, ' ' + line.lstrip(), - charset) - else: - sline = line.lstrip() - fws = line[:len(line)-len(sline)] - formatter.feed(fws, sline, charset) - if len(lines) > 1: - formatter.newline() - if self._chunks: - formatter.add_transition() - value = formatter._str(linesep) - if _embeded_header.search(value): - raise HeaderParseError("header value appears to contain " - "an embedded header: {!r}".format(value)) - return value - - def _normalize(self): - # Step 1: Normalize the chunks so that all runs of identical charsets - # get collapsed into a single unicode string. - chunks = [] - last_charset = None - last_chunk = [] - for string, charset in self._chunks: - if charset == last_charset: - last_chunk.append(string) - else: - if last_charset is not None: - chunks.append((SPACE.join(last_chunk), last_charset)) - last_chunk = [string] - last_charset = charset - if last_chunk: - chunks.append((SPACE.join(last_chunk), last_charset)) - self._chunks = chunks - - -class _ValueFormatter(object): - def __init__(self, headerlen, maxlen, continuation_ws, splitchars): - self._maxlen = maxlen - self._continuation_ws = continuation_ws - self._continuation_ws_len = len(continuation_ws) - self._splitchars = splitchars - self._lines = [] - self._current_line = _Accumulator(headerlen) - - def _str(self, linesep): - self.newline() - return linesep.join(self._lines) - - def __str__(self): - return self._str(NL) - - def newline(self): - end_of_line = self._current_line.pop() - if end_of_line != (' ', ''): - self._current_line.push(*end_of_line) - if len(self._current_line) > 0: - if self._current_line.is_onlyws(): - self._lines[-1] += str(self._current_line) - else: - self._lines.append(str(self._current_line)) - self._current_line.reset() - - def add_transition(self): - self._current_line.push(' ', '') - - def feed(self, fws, string, charset): - # If the charset has no header encoding (i.e. it is an ASCII encoding) - # then we must split the header at the "highest level syntactic break" - # possible. Note that we don't have a lot of smarts about field - # syntax; we just try to break on semi-colons, then commas, then - # whitespace. Eventually, this should be pluggable. - if charset.header_encoding is None: - self._ascii_split(fws, string, self._splitchars) - return - # Otherwise, we're doing either a Base64 or a quoted-printable - # encoding which means we don't need to split the line on syntactic - # breaks. We can basically just find enough characters to fit on the - # current line, minus the RFC 2047 chrome. What makes this trickier - # though is that we have to split at octet boundaries, not character - # boundaries but it's only safe to split at character boundaries so at - # best we can only get close. - encoded_lines = charset.header_encode_lines(string, self._maxlengths()) - # The first element extends the current line, but if it's None then - # nothing more fit on the current line so start a new line. - try: - first_line = encoded_lines.pop(0) - except IndexError: - # There are no encoded lines, so we're done. - return - if first_line is not None: - self._append_chunk(fws, first_line) - try: - last_line = encoded_lines.pop() - except IndexError: - # There was only one line. - return - self.newline() - self._current_line.push(self._continuation_ws, last_line) - # Everything else are full lines in themselves. - for line in encoded_lines: - self._lines.append(self._continuation_ws + line) - - def _maxlengths(self): - # The first line's length. - yield self._maxlen - len(self._current_line) - while True: - yield self._maxlen - self._continuation_ws_len - - def _ascii_split(self, fws, string, splitchars): - # The RFC 2822 header folding algorithm is simple in principle but - # complex in practice. Lines may be folded any place where "folding - # white space" appears by inserting a linesep character in front of the - # FWS. The complication is that not all spaces or tabs qualify as FWS, - # and we are also supposed to prefer to break at "higher level - # syntactic breaks". We can't do either of these without intimate - # knowledge of the structure of structured headers, which we don't have - # here. So the best we can do here is prefer to break at the specified - # splitchars, and hope that we don't choose any spaces or tabs that - # aren't legal FWS. (This is at least better than the old algorithm, - # where we would sometimes *introduce* FWS after a splitchar, or the - # algorithm before that, where we would turn all white space runs into - # single spaces or tabs.) - parts = re.split("(["+FWS+"]+)", fws+string) - if parts[0]: - parts[:0] = [''] - else: - parts.pop(0) - for fws, part in zip(*[iter(parts)]*2): - self._append_chunk(fws, part) - - def _append_chunk(self, fws, string): - self._current_line.push(fws, string) - if len(self._current_line) > self._maxlen: - # Find the best split point, working backward from the end. - # There might be none, on a long first line. - for ch in self._splitchars: - for i in range(self._current_line.part_count()-1, 0, -1): - if ch.isspace(): - fws = self._current_line[i][0] - if fws and fws[0]==ch: - break - prevpart = self._current_line[i-1][1] - if prevpart and prevpart[-1]==ch: - break - else: - continue - break - else: - fws, part = self._current_line.pop() - if self._current_line._initial_size > 0: - # There will be a header, so leave it on a line by itself. - self.newline() - if not fws: - # We don't use continuation_ws here because the whitespace - # after a header should always be a space. - fws = ' ' - self._current_line.push(fws, part) - return - remainder = self._current_line.pop_from(i) - self._lines.append(str(self._current_line)) - self._current_line.reset(remainder) - - -class _Accumulator(list): - - def __init__(self, initial_size=0): - self._initial_size = initial_size - super().__init__() - - def push(self, fws, string): - self.append((fws, string)) - - def pop_from(self, i=0): - popped = self[i:] - self[i:] = [] - return popped - - def pop(self): - if self.part_count()==0: - return ('', '') - return super().pop() - - def __len__(self): - return sum((len(fws)+len(part) for fws, part in self), - self._initial_size) - - def __str__(self): - return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) - for fws, part in self)) - - def reset(self, startval=None): - if startval is None: - startval = [] - self[:] = startval - self._initial_size = 0 - - def is_onlyws(self): - return self._initial_size==0 and (not self or str(self).isspace()) - - def part_count(self): - return super().__len__() diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/headerregistry.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/headerregistry.py deleted file mode 100644 index 9aaad65..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/headerregistry.py +++ /dev/null @@ -1,592 +0,0 @@ -"""Representing and manipulating email headers via custom objects. - -This module provides an implementation of the HeaderRegistry API. -The implementation is designed to flexibly follow RFC5322 rules. - -Eventually HeaderRegistry will be a public API, but it isn't yet, -and will probably change some before that happens. - -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -from future.builtins import super -from future.builtins import str -from future.utils import text_to_native_str -from future.backports.email import utils -from future.backports.email import errors -from future.backports.email import _header_value_parser as parser - -class Address(object): - - def __init__(self, display_name='', username='', domain='', addr_spec=None): - """Create an object represeting a full email address. - - An address can have a 'display_name', a 'username', and a 'domain'. In - addition to specifying the username and domain separately, they may be - specified together by using the addr_spec keyword *instead of* the - username and domain keywords. If an addr_spec string is specified it - must be properly quoted according to RFC 5322 rules; an error will be - raised if it is not. - - An Address object has display_name, username, domain, and addr_spec - attributes, all of which are read-only. The addr_spec and the string - value of the object are both quoted according to RFC5322 rules, but - without any Content Transfer Encoding. - - """ - # This clause with its potential 'raise' may only happen when an - # application program creates an Address object using an addr_spec - # keyword. The email library code itself must always supply username - # and domain. - if addr_spec is not None: - if username or domain: - raise TypeError("addrspec specified when username and/or " - "domain also specified") - a_s, rest = parser.get_addr_spec(addr_spec) - if rest: - raise ValueError("Invalid addr_spec; only '{}' " - "could be parsed from '{}'".format( - a_s, addr_spec)) - if a_s.all_defects: - raise a_s.all_defects[0] - username = a_s.local_part - domain = a_s.domain - self._display_name = display_name - self._username = username - self._domain = domain - - @property - def display_name(self): - return self._display_name - - @property - def username(self): - return self._username - - @property - def domain(self): - return self._domain - - @property - def addr_spec(self): - """The addr_spec (username@domain) portion of the address, quoted - according to RFC 5322 rules, but with no Content Transfer Encoding. - """ - nameset = set(self.username) - if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): - lp = parser.quote_string(self.username) - else: - lp = self.username - if self.domain: - return lp + '@' + self.domain - if not lp: - return '<>' - return lp - - def __repr__(self): - return "Address(display_name={!r}, username={!r}, domain={!r})".format( - self.display_name, self.username, self.domain) - - def __str__(self): - nameset = set(self.display_name) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(self.display_name) - else: - disp = self.display_name - if disp: - addr_spec = '' if self.addr_spec=='<>' else self.addr_spec - return "{} <{}>".format(disp, addr_spec) - return self.addr_spec - - def __eq__(self, other): - if type(other) != type(self): - return False - return (self.display_name == other.display_name and - self.username == other.username and - self.domain == other.domain) - - -class Group(object): - - def __init__(self, display_name=None, addresses=None): - """Create an object representing an address group. - - An address group consists of a display_name followed by colon and an - list of addresses (see Address) terminated by a semi-colon. The Group - is created by specifying a display_name and a possibly empty list of - Address objects. A Group can also be used to represent a single - address that is not in a group, which is convenient when manipulating - lists that are a combination of Groups and individual Addresses. In - this case the display_name should be set to None. In particular, the - string representation of a Group whose display_name is None is the same - as the Address object, if there is one and only one Address object in - the addresses list. - - """ - self._display_name = display_name - self._addresses = tuple(addresses) if addresses else tuple() - - @property - def display_name(self): - return self._display_name - - @property - def addresses(self): - return self._addresses - - def __repr__(self): - return "Group(display_name={!r}, addresses={!r}".format( - self.display_name, self.addresses) - - def __str__(self): - if self.display_name is None and len(self.addresses)==1: - return str(self.addresses[0]) - disp = self.display_name - if disp is not None: - nameset = set(disp) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(disp) - adrstr = ", ".join(str(x) for x in self.addresses) - adrstr = ' ' + adrstr if adrstr else adrstr - return "{}:{};".format(disp, adrstr) - - def __eq__(self, other): - if type(other) != type(self): - return False - return (self.display_name == other.display_name and - self.addresses == other.addresses) - - -# Header Classes # - -class BaseHeader(str): - - """Base class for message headers. - - Implements generic behavior and provides tools for subclasses. - - A subclass must define a classmethod named 'parse' that takes an unfolded - value string and a dictionary as its arguments. The dictionary will - contain one key, 'defects', initialized to an empty list. After the call - the dictionary must contain two additional keys: parse_tree, set to the - parse tree obtained from parsing the header, and 'decoded', set to the - string value of the idealized representation of the data from the value. - (That is, encoded words are decoded, and values that have canonical - representations are so represented.) - - The defects key is intended to collect parsing defects, which the message - parser will subsequently dispose of as appropriate. The parser should not, - insofar as practical, raise any errors. Defects should be added to the - list instead. The standard header parsers register defects for RFC - compliance issues, for obsolete RFC syntax, and for unrecoverable parsing - errors. - - The parse method may add additional keys to the dictionary. In this case - the subclass must define an 'init' method, which will be passed the - dictionary as its keyword arguments. The method should use (usually by - setting them as the value of similarly named attributes) and remove all the - extra keys added by its parse method, and then use super to call its parent - class with the remaining arguments and keywords. - - The subclass should also make sure that a 'max_count' attribute is defined - that is either None or 1. XXX: need to better define this API. - - """ - - def __new__(cls, name, value): - kwds = {'defects': []} - cls.parse(value, kwds) - if utils._has_surrogates(kwds['decoded']): - kwds['decoded'] = utils._sanitize(kwds['decoded']) - self = str.__new__(cls, kwds['decoded']) - # del kwds['decoded'] - self.init(name, **kwds) - return self - - def init(self, name, **_3to2kwargs): - defects = _3to2kwargs['defects']; del _3to2kwargs['defects'] - parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree'] - self._name = name - self._parse_tree = parse_tree - self._defects = defects - - @property - def name(self): - return self._name - - @property - def defects(self): - return tuple(self._defects) - - def __reduce__(self): - return ( - _reconstruct_header, - ( - self.__class__.__name__, - self.__class__.__bases__, - str(self), - ), - self.__dict__) - - @classmethod - def _reconstruct(cls, value): - return str.__new__(cls, value) - - def fold(self, **_3to2kwargs): - policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - """Fold header according to policy. - - The parsed representation of the header is folded according to - RFC5322 rules, as modified by the policy. If the parse tree - contains surrogateescaped bytes, the bytes are CTE encoded using - the charset 'unknown-8bit". - - Any non-ASCII characters in the parse tree are CTE encoded using - charset utf-8. XXX: make this a policy setting. - - The returned value is an ASCII-only string possibly containing linesep - characters, and ending with a linesep character. The string includes - the header name and the ': ' separator. - - """ - # At some point we need to only put fws here if it was in the source. - header = parser.Header([ - parser.HeaderLabel([ - parser.ValueTerminal(self.name, 'header-name'), - parser.ValueTerminal(':', 'header-sep')]), - parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), - self._parse_tree]) - return header.fold(policy=policy) - - -def _reconstruct_header(cls_name, bases, value): - return type(text_to_native_str(cls_name), bases, {})._reconstruct(value) - - -class UnstructuredHeader(object): - - max_count = None - value_parser = staticmethod(parser.get_unstructured) - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = cls.value_parser(value) - kwds['decoded'] = str(kwds['parse_tree']) - - -class UniqueUnstructuredHeader(UnstructuredHeader): - - max_count = 1 - - -class DateHeader(object): - - """Header whose value consists of a single timestamp. - - Provides an additional attribute, datetime, which is either an aware - datetime using a timezone, or a naive datetime if the timezone - in the input string is -0000. Also accepts a datetime as input. - The 'value' attribute is the normalized form of the timestamp, - which means it is the output of format_datetime on the datetime. - """ - - max_count = None - - # This is used only for folding, not for creating 'decoded'. - value_parser = staticmethod(parser.get_unstructured) - - @classmethod - def parse(cls, value, kwds): - if not value: - kwds['defects'].append(errors.HeaderMissingRequiredValue()) - kwds['datetime'] = None - kwds['decoded'] = '' - kwds['parse_tree'] = parser.TokenList() - return - if isinstance(value, str): - value = utils.parsedate_to_datetime(value) - kwds['datetime'] = value - kwds['decoded'] = utils.format_datetime(kwds['datetime']) - kwds['parse_tree'] = cls.value_parser(kwds['decoded']) - - def init(self, *args, **kw): - self._datetime = kw.pop('datetime') - super().init(*args, **kw) - - @property - def datetime(self): - return self._datetime - - -class UniqueDateHeader(DateHeader): - - max_count = 1 - - -class AddressHeader(object): - - max_count = None - - @staticmethod - def value_parser(value): - address_list, value = parser.get_address_list(value) - assert not value, 'this should not happen' - return address_list - - @classmethod - def parse(cls, value, kwds): - if isinstance(value, str): - # We are translating here from the RFC language (address/mailbox) - # to our API language (group/address). - kwds['parse_tree'] = address_list = cls.value_parser(value) - groups = [] - for addr in address_list.addresses: - groups.append(Group(addr.display_name, - [Address(mb.display_name or '', - mb.local_part or '', - mb.domain or '') - for mb in addr.all_mailboxes])) - defects = list(address_list.all_defects) - else: - # Assume it is Address/Group stuff - if not hasattr(value, '__iter__'): - value = [value] - groups = [Group(None, [item]) if not hasattr(item, 'addresses') - else item - for item in value] - defects = [] - kwds['groups'] = groups - kwds['defects'] = defects - kwds['decoded'] = ', '.join([str(item) for item in groups]) - if 'parse_tree' not in kwds: - kwds['parse_tree'] = cls.value_parser(kwds['decoded']) - - def init(self, *args, **kw): - self._groups = tuple(kw.pop('groups')) - self._addresses = None - super().init(*args, **kw) - - @property - def groups(self): - return self._groups - - @property - def addresses(self): - if self._addresses is None: - self._addresses = tuple([address for group in self._groups - for address in group.addresses]) - return self._addresses - - -class UniqueAddressHeader(AddressHeader): - - max_count = 1 - - -class SingleAddressHeader(AddressHeader): - - @property - def address(self): - if len(self.addresses)!=1: - raise ValueError(("value of single address header {} is not " - "a single address").format(self.name)) - return self.addresses[0] - - -class UniqueSingleAddressHeader(SingleAddressHeader): - - max_count = 1 - - -class MIMEVersionHeader(object): - - max_count = 1 - - value_parser = staticmethod(parser.parse_mime_version) - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = parse_tree = cls.value_parser(value) - kwds['decoded'] = str(parse_tree) - kwds['defects'].extend(parse_tree.all_defects) - kwds['major'] = None if parse_tree.minor is None else parse_tree.major - kwds['minor'] = parse_tree.minor - if parse_tree.minor is not None: - kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) - else: - kwds['version'] = None - - def init(self, *args, **kw): - self._version = kw.pop('version') - self._major = kw.pop('major') - self._minor = kw.pop('minor') - super().init(*args, **kw) - - @property - def major(self): - return self._major - - @property - def minor(self): - return self._minor - - @property - def version(self): - return self._version - - -class ParameterizedMIMEHeader(object): - - # Mixin that handles the params dict. Must be subclassed and - # a property value_parser for the specific header provided. - - max_count = 1 - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = parse_tree = cls.value_parser(value) - kwds['decoded'] = str(parse_tree) - kwds['defects'].extend(parse_tree.all_defects) - if parse_tree.params is None: - kwds['params'] = {} - else: - # The MIME RFCs specify that parameter ordering is arbitrary. - kwds['params'] = dict((utils._sanitize(name).lower(), - utils._sanitize(value)) - for name, value in parse_tree.params) - - def init(self, *args, **kw): - self._params = kw.pop('params') - super().init(*args, **kw) - - @property - def params(self): - return self._params.copy() - - -class ContentTypeHeader(ParameterizedMIMEHeader): - - value_parser = staticmethod(parser.parse_content_type_header) - - def init(self, *args, **kw): - super().init(*args, **kw) - self._maintype = utils._sanitize(self._parse_tree.maintype) - self._subtype = utils._sanitize(self._parse_tree.subtype) - - @property - def maintype(self): - return self._maintype - - @property - def subtype(self): - return self._subtype - - @property - def content_type(self): - return self.maintype + '/' + self.subtype - - -class ContentDispositionHeader(ParameterizedMIMEHeader): - - value_parser = staticmethod(parser.parse_content_disposition_header) - - def init(self, *args, **kw): - super().init(*args, **kw) - cd = self._parse_tree.content_disposition - self._content_disposition = cd if cd is None else utils._sanitize(cd) - - @property - def content_disposition(self): - return self._content_disposition - - -class ContentTransferEncodingHeader(object): - - max_count = 1 - - value_parser = staticmethod(parser.parse_content_transfer_encoding_header) - - @classmethod - def parse(cls, value, kwds): - kwds['parse_tree'] = parse_tree = cls.value_parser(value) - kwds['decoded'] = str(parse_tree) - kwds['defects'].extend(parse_tree.all_defects) - - def init(self, *args, **kw): - super().init(*args, **kw) - self._cte = utils._sanitize(self._parse_tree.cte) - - @property - def cte(self): - return self._cte - - -# The header factory # - -_default_header_map = { - 'subject': UniqueUnstructuredHeader, - 'date': UniqueDateHeader, - 'resent-date': DateHeader, - 'orig-date': UniqueDateHeader, - 'sender': UniqueSingleAddressHeader, - 'resent-sender': SingleAddressHeader, - 'to': UniqueAddressHeader, - 'resent-to': AddressHeader, - 'cc': UniqueAddressHeader, - 'resent-cc': AddressHeader, - 'bcc': UniqueAddressHeader, - 'resent-bcc': AddressHeader, - 'from': UniqueAddressHeader, - 'resent-from': AddressHeader, - 'reply-to': UniqueAddressHeader, - 'mime-version': MIMEVersionHeader, - 'content-type': ContentTypeHeader, - 'content-disposition': ContentDispositionHeader, - 'content-transfer-encoding': ContentTransferEncodingHeader, - } - -class HeaderRegistry(object): - - """A header_factory and header registry.""" - - def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, - use_default_map=True): - """Create a header_factory that works with the Policy API. - - base_class is the class that will be the last class in the created - header class's __bases__ list. default_class is the class that will be - used if "name" (see __call__) does not appear in the registry. - use_default_map controls whether or not the default mapping of names to - specialized classes is copied in to the registry when the factory is - created. The default is True. - - """ - self.registry = {} - self.base_class = base_class - self.default_class = default_class - if use_default_map: - self.registry.update(_default_header_map) - - def map_to_type(self, name, cls): - """Register cls as the specialized class for handling "name" headers. - - """ - self.registry[name.lower()] = cls - - def __getitem__(self, name): - cls = self.registry.get(name.lower(), self.default_class) - return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {}) - - def __call__(self, name, value): - """Create a header instance for header 'name' from 'value'. - - Creates a header instance by creating a specialized class for parsing - and representing the specified header by combining the factory - base_class with a specialized class from the registry or the - default_class, and passing the name and value to the constructed - class's constructor. - - """ - return self[name](name, value) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/iterators.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/iterators.py deleted file mode 100644 index 82d320f..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/iterators.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Various types of useful iterators and generators.""" -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = [ - 'body_line_iterator', - 'typed_subpart_iterator', - 'walk', - # Do not include _structure() since it's part of the debugging API. - ] - -import sys -from io import StringIO - - -# This function will become a method of the Message class -def walk(self): - """Walk over the message tree, yielding each subpart. - - The walk is performed in depth-first order. This method is a - generator. - """ - yield self - if self.is_multipart(): - for subpart in self.get_payload(): - for subsubpart in subpart.walk(): - yield subsubpart - - -# These two functions are imported into the Iterators.py interface module. -def body_line_iterator(msg, decode=False): - """Iterate over the parts, returning string payloads line-by-line. - - Optional decode (default False) is passed through to .get_payload(). - """ - for subpart in msg.walk(): - payload = subpart.get_payload(decode=decode) - if isinstance(payload, str): - for line in StringIO(payload): - yield line - - -def typed_subpart_iterator(msg, maintype='text', subtype=None): - """Iterate over the subparts with a given MIME type. - - Use `maintype' as the main MIME type to match against; this defaults to - "text". Optional `subtype' is the MIME subtype to match against; if - omitted, only the main type is matched. - """ - for subpart in msg.walk(): - if subpart.get_content_maintype() == maintype: - if subtype is None or subpart.get_content_subtype() == subtype: - yield subpart - - -def _structure(msg, fp=None, level=0, include_default=False): - """A handy debugging aid""" - if fp is None: - fp = sys.stdout - tab = ' ' * (level * 4) - print(tab + msg.get_content_type(), end='', file=fp) - if include_default: - print(' [%s]' % msg.get_default_type(), file=fp) - else: - print(file=fp) - if msg.is_multipart(): - for subpart in msg.get_payload(): - _structure(subpart, fp, level+1, include_default) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/message.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/message.py deleted file mode 100644 index d8d9615..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/message.py +++ /dev/null @@ -1,882 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Basic message object for the email package object model.""" -from __future__ import absolute_import, division, unicode_literals -from future.builtins import list, range, str, zip - -__all__ = ['Message'] - -import re -import uu -import base64 -import binascii -from io import BytesIO, StringIO - -# Intrapackage imports -from future.utils import as_native_str -from future.backports.email import utils -from future.backports.email import errors -from future.backports.email._policybase import compat32 -from future.backports.email import charset as _charset -from future.backports.email._encoded_words import decode_b -Charset = _charset.Charset - -SEMISPACE = '; ' - -# Regular expression that matches `special' characters in parameters, the -# existence of which force quoting of the parameter value. -tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') - - -def _splitparam(param): - # Split header parameters. BAW: this may be too simple. It isn't - # strictly RFC 2045 (section 5.1) compliant, but it catches most headers - # found in the wild. We may eventually need a full fledged parser. - # RDM: we might have a Header here; for now just stringify it. - a, sep, b = str(param).partition(';') - if not sep: - return a.strip(), None - return a.strip(), b.strip() - -def _formatparam(param, value=None, quote=True): - """Convenience function to format and return a key=value pair. - - This will quote the value if needed or if quote is true. If value is a - three tuple (charset, language, value), it will be encoded according - to RFC2231 rules. If it contains non-ascii characters it will likewise - be encoded according to RFC2231 rules, using the utf-8 charset and - a null language. - """ - if value is not None and len(value) > 0: - # A tuple is used for RFC 2231 encoded parameter values where items - # are (charset, language, value). charset is a string, not a Charset - # instance. RFC 2231 encoded values are never quoted, per RFC. - if isinstance(value, tuple): - # Encode as per RFC 2231 - param += '*' - value = utils.encode_rfc2231(value[2], value[0], value[1]) - return '%s=%s' % (param, value) - else: - try: - value.encode('ascii') - except UnicodeEncodeError: - param += '*' - value = utils.encode_rfc2231(value, 'utf-8', '') - return '%s=%s' % (param, value) - # BAW: Please check this. I think that if quote is set it should - # force quoting even if not necessary. - if quote or tspecials.search(value): - return '%s="%s"' % (param, utils.quote(value)) - else: - return '%s=%s' % (param, value) - else: - return param - -def _parseparam(s): - # RDM This might be a Header, so for now stringify it. - s = ';' + str(s) - plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() - plist.append(f.strip()) - s = s[end:] - return plist - - -def _unquotevalue(value): - # This is different than utils.collapse_rfc2231_value() because it doesn't - # try to convert the value to a unicode. Message.get_param() and - # Message.get_params() are both currently defined to return the tuple in - # the face of RFC 2231 parameters. - if isinstance(value, tuple): - return value[0], value[1], utils.unquote(value[2]) - else: - return utils.unquote(value) - - -class Message(object): - """Basic message object. - - A message object is defined as something that has a bunch of RFC 2822 - headers and a payload. It may optionally have an envelope header - (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a - multipart or a message/rfc822), then the payload is a list of Message - objects, otherwise it is a string. - - Message objects implement part of the `mapping' interface, which assumes - there is exactly one occurrence of the header per message. Some headers - do in fact appear multiple times (e.g. Received) and for those headers, - you must use the explicit API to set or get all the headers. Not all of - the mapping methods are implemented. - """ - def __init__(self, policy=compat32): - self.policy = policy - self._headers = list() - self._unixfrom = None - self._payload = None - self._charset = None - # Defaults for multipart messages - self.preamble = self.epilogue = None - self.defects = [] - # Default content type - self._default_type = 'text/plain' - - @as_native_str(encoding='utf-8') - def __str__(self): - """Return the entire formatted message as a string. - This includes the headers, body, and envelope header. - """ - return self.as_string() - - def as_string(self, unixfrom=False, maxheaderlen=0): - """Return the entire formatted message as a (unicode) string. - Optional `unixfrom' when True, means include the Unix From_ envelope - header. - - This is a convenience method and may not generate the message exactly - as you intend. For more flexibility, use the flatten() method of a - Generator instance. - """ - from future.backports.email.generator import Generator - fp = StringIO() - g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) - g.flatten(self, unixfrom=unixfrom) - return fp.getvalue() - - def is_multipart(self): - """Return True if the message consists of multiple parts.""" - return isinstance(self._payload, list) - - # - # Unix From_ line - # - def set_unixfrom(self, unixfrom): - self._unixfrom = unixfrom - - def get_unixfrom(self): - return self._unixfrom - - # - # Payload manipulation. - # - def attach(self, payload): - """Add the given payload to the current payload. - - The current payload will always be a list of objects after this method - is called. If you want to set the payload to a scalar object, use - set_payload() instead. - """ - if self._payload is None: - self._payload = [payload] - else: - self._payload.append(payload) - - def get_payload(self, i=None, decode=False): - """Return a reference to the payload. - - The payload will either be a list object or a string. If you mutate - the list object, you modify the message's payload in place. Optional - i returns that index into the payload. - - Optional decode is a flag indicating whether the payload should be - decoded or not, according to the Content-Transfer-Encoding header - (default is False). - - When True and the message is not a multipart, the payload will be - decoded if this header's value is `quoted-printable' or `base64'. If - some other encoding is used, or the header is missing, or if the - payload has bogus data (i.e. bogus base64 or uuencoded data), the - payload is returned as-is. - - If the message is a multipart and the decode flag is True, then None - is returned. - """ - # Here is the logic table for this code, based on the email5.0.0 code: - # i decode is_multipart result - # ------ ------ ------------ ------------------------------ - # None True True None - # i True True None - # None False True _payload (a list) - # i False True _payload element i (a Message) - # i False False error (not a list) - # i True False error (not a list) - # None False False _payload - # None True False _payload decoded (bytes) - # Note that Barry planned to factor out the 'decode' case, but that - # isn't so easy now that we handle the 8 bit data, which needs to be - # converted in both the decode and non-decode path. - if self.is_multipart(): - if decode: - return None - if i is None: - return self._payload - else: - return self._payload[i] - # For backward compatibility, Use isinstance and this error message - # instead of the more logical is_multipart test. - if i is not None and not isinstance(self._payload, list): - raise TypeError('Expected list, got %s' % type(self._payload)) - payload = self._payload - # cte might be a Header, so for now stringify it. - cte = str(self.get('content-transfer-encoding', '')).lower() - # payload may be bytes here. - if isinstance(payload, str): - payload = str(payload) # for Python-Future, so surrogateescape works - if utils._has_surrogates(payload): - bpayload = payload.encode('ascii', 'surrogateescape') - if not decode: - try: - payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') - except LookupError: - payload = bpayload.decode('ascii', 'replace') - elif decode: - try: - bpayload = payload.encode('ascii') - except UnicodeError: - # This won't happen for RFC compliant messages (messages - # containing only ASCII codepoints in the unicode input). - # If it does happen, turn the string into bytes in a way - # guaranteed not to fail. - bpayload = payload.encode('raw-unicode-escape') - if not decode: - return payload - if cte == 'quoted-printable': - return utils._qdecode(bpayload) - elif cte == 'base64': - # XXX: this is a bit of a hack; decode_b should probably be factored - # out somewhere, but I haven't figured out where yet. - value, defects = decode_b(b''.join(bpayload.splitlines())) - for defect in defects: - self.policy.handle_defect(self, defect) - return value - elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): - in_file = BytesIO(bpayload) - out_file = BytesIO() - try: - uu.decode(in_file, out_file, quiet=True) - return out_file.getvalue() - except uu.Error: - # Some decoding problem - return bpayload - if isinstance(payload, str): - return bpayload - return payload - - def set_payload(self, payload, charset=None): - """Set the payload to the given value. - - Optional charset sets the message's default character set. See - set_charset() for details. - """ - self._payload = payload - if charset is not None: - self.set_charset(charset) - - def set_charset(self, charset): - """Set the charset of the payload to a given character set. - - charset can be a Charset instance, a string naming a character set, or - None. If it is a string it will be converted to a Charset instance. - If charset is None, the charset parameter will be removed from the - Content-Type field. Anything else will generate a TypeError. - - The message will be assumed to be of type text/* encoded with - charset.input_charset. It will be converted to charset.output_charset - and encoded properly, if needed, when generating the plain text - representation of the message. MIME headers (MIME-Version, - Content-Type, Content-Transfer-Encoding) will be added as needed. - """ - if charset is None: - self.del_param('charset') - self._charset = None - return - if not isinstance(charset, Charset): - charset = Charset(charset) - self._charset = charset - if 'MIME-Version' not in self: - self.add_header('MIME-Version', '1.0') - if 'Content-Type' not in self: - self.add_header('Content-Type', 'text/plain', - charset=charset.get_output_charset()) - else: - self.set_param('charset', charset.get_output_charset()) - if charset != charset.get_output_charset(): - self._payload = charset.body_encode(self._payload) - if 'Content-Transfer-Encoding' not in self: - cte = charset.get_body_encoding() - try: - cte(self) - except TypeError: - self._payload = charset.body_encode(self._payload) - self.add_header('Content-Transfer-Encoding', cte) - - def get_charset(self): - """Return the Charset instance associated with the message's payload. - """ - return self._charset - - # - # MAPPING INTERFACE (partial) - # - def __len__(self): - """Return the total number of headers, including duplicates.""" - return len(self._headers) - - def __getitem__(self, name): - """Get a header value. - - Return None if the header is missing instead of raising an exception. - - Note that if the header appeared multiple times, exactly which - occurrence gets returned is undefined. Use get_all() to get all - the values matching a header field name. - """ - return self.get(name) - - def __setitem__(self, name, val): - """Set the value of a header. - - Note: this does not overwrite an existing header with the same field - name. Use __delitem__() first to delete any existing headers. - """ - max_count = self.policy.header_max_count(name) - if max_count: - lname = name.lower() - found = 0 - for k, v in self._headers: - if k.lower() == lname: - found += 1 - if found >= max_count: - raise ValueError("There may be at most {} {} headers " - "in a message".format(max_count, name)) - self._headers.append(self.policy.header_store_parse(name, val)) - - def __delitem__(self, name): - """Delete all occurrences of a header, if present. - - Does not raise an exception if the header is missing. - """ - name = name.lower() - newheaders = list() - for k, v in self._headers: - if k.lower() != name: - newheaders.append((k, v)) - self._headers = newheaders - - def __contains__(self, name): - return name.lower() in [k.lower() for k, v in self._headers] - - def __iter__(self): - for field, value in self._headers: - yield field - - def keys(self): - """Return a list of all the message's header field names. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [k for k, v in self._headers] - - def values(self): - """Return a list of all the message's header values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [self.policy.header_fetch_parse(k, v) - for k, v in self._headers] - - def items(self): - """Get all the message's header fields and values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [(k, self.policy.header_fetch_parse(k, v)) - for k, v in self._headers] - - def get(self, name, failobj=None): - """Get a header value. - - Like __getitem__() but return failobj instead of None when the field - is missing. - """ - name = name.lower() - for k, v in self._headers: - if k.lower() == name: - return self.policy.header_fetch_parse(k, v) - return failobj - - # - # "Internal" methods (public API, but only intended for use by a parser - # or generator, not normal application code. - # - - def set_raw(self, name, value): - """Store name and value in the model without modification. - - This is an "internal" API, intended only for use by a parser. - """ - self._headers.append((name, value)) - - def raw_items(self): - """Return the (name, value) header pairs without modification. - - This is an "internal" API, intended only for use by a generator. - """ - return iter(self._headers.copy()) - - # - # Additional useful stuff - # - - def get_all(self, name, failobj=None): - """Return a list of all the values for the named field. - - These will be sorted in the order they appeared in the original - message, and may contain duplicates. Any fields deleted and - re-inserted are always appended to the header list. - - If no such fields exist, failobj is returned (defaults to None). - """ - values = [] - name = name.lower() - for k, v in self._headers: - if k.lower() == name: - values.append(self.policy.header_fetch_parse(k, v)) - if not values: - return failobj - return values - - def add_header(self, _name, _value, **_params): - """Extended header setting. - - name is the header field to add. keyword arguments can be used to set - additional parameters for the header field, with underscores converted - to dashes. Normally the parameter will be added as key="value" unless - value is None, in which case only the key will be added. If a - parameter value contains non-ASCII characters it can be specified as a - three-tuple of (charset, language, value), in which case it will be - encoded according to RFC2231 rules. Otherwise it will be encoded using - the utf-8 charset and a language of ''. - - Examples: - - msg.add_header('content-disposition', 'attachment', filename='bud.gif') - msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', 'Fußballer.ppt')) - msg.add_header('content-disposition', 'attachment', - filename='Fußballer.ppt')) - """ - parts = [] - for k, v in _params.items(): - if v is None: - parts.append(k.replace('_', '-')) - else: - parts.append(_formatparam(k.replace('_', '-'), v)) - if _value is not None: - parts.insert(0, _value) - self[_name] = SEMISPACE.join(parts) - - def replace_header(self, _name, _value): - """Replace a header. - - Replace the first matching header found in the message, retaining - header order and case. If no matching header was found, a KeyError is - raised. - """ - _name = _name.lower() - for i, (k, v) in zip(range(len(self._headers)), self._headers): - if k.lower() == _name: - self._headers[i] = self.policy.header_store_parse(k, _value) - break - else: - raise KeyError(_name) - - # - # Use these three methods instead of the three above. - # - - def get_content_type(self): - """Return the message's content type. - - The returned string is coerced to lower case of the form - `maintype/subtype'. If there was no Content-Type header in the - message, the default type as given by get_default_type() will be - returned. Since according to RFC 2045, messages always have a default - type this will always return a value. - - RFC 2045 defines a message's default type to be text/plain unless it - appears inside a multipart/digest container, in which case it would be - message/rfc822. - """ - missing = object() - value = self.get('content-type', missing) - if value is missing: - # This should have no parameters - return self.get_default_type() - ctype = _splitparam(value)[0].lower() - # RFC 2045, section 5.2 says if its invalid, use text/plain - if ctype.count('/') != 1: - return 'text/plain' - return ctype - - def get_content_maintype(self): - """Return the message's main content type. - - This is the `maintype' part of the string returned by - get_content_type(). - """ - ctype = self.get_content_type() - return ctype.split('/')[0] - - def get_content_subtype(self): - """Returns the message's sub-content type. - - This is the `subtype' part of the string returned by - get_content_type(). - """ - ctype = self.get_content_type() - return ctype.split('/')[1] - - def get_default_type(self): - """Return the `default' content type. - - Most messages have a default content type of text/plain, except for - messages that are subparts of multipart/digest containers. Such - subparts have a default content type of message/rfc822. - """ - return self._default_type - - def set_default_type(self, ctype): - """Set the `default' content type. - - ctype should be either "text/plain" or "message/rfc822", although this - is not enforced. The default content type is not stored in the - Content-Type header. - """ - self._default_type = ctype - - def _get_params_preserve(self, failobj, header): - # Like get_params() but preserves the quoting of values. BAW: - # should this be part of the public interface? - missing = object() - value = self.get(header, missing) - if value is missing: - return failobj - params = [] - for p in _parseparam(value): - try: - name, val = p.split('=', 1) - name = name.strip() - val = val.strip() - except ValueError: - # Must have been a bare attribute - name = p.strip() - val = '' - params.append((name, val)) - params = utils.decode_params(params) - return params - - def get_params(self, failobj=None, header='content-type', unquote=True): - """Return the message's Content-Type parameters, as a list. - - The elements of the returned list are 2-tuples of key/value pairs, as - split on the `=' sign. The left hand side of the `=' is the key, - while the right hand side is the value. If there is no `=' sign in - the parameter the value is the empty string. The value is as - described in the get_param() method. - - Optional failobj is the object to return if there is no Content-Type - header. Optional header is the header to search instead of - Content-Type. If unquote is True, the value is unquoted. - """ - missing = object() - params = self._get_params_preserve(missing, header) - if params is missing: - return failobj - if unquote: - return [(k, _unquotevalue(v)) for k, v in params] - else: - return params - - def get_param(self, param, failobj=None, header='content-type', - unquote=True): - """Return the parameter value if found in the Content-Type header. - - Optional failobj is the object to return if there is no Content-Type - header, or the Content-Type header has no such parameter. Optional - header is the header to search instead of Content-Type. - - Parameter keys are always compared case insensitively. The return - value can either be a string, or a 3-tuple if the parameter was RFC - 2231 encoded. When it's a 3-tuple, the elements of the value are of - the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and - LANGUAGE can be None, in which case you should consider VALUE to be - encoded in the us-ascii charset. You can usually ignore LANGUAGE. - The parameter value (either the returned string, or the VALUE item in - the 3-tuple) is always unquoted, unless unquote is set to False. - - If your application doesn't care whether the parameter was RFC 2231 - encoded, it can turn the return value into a string as follows: - - param = msg.get_param('foo') - param = email.utils.collapse_rfc2231_value(rawparam) - - """ - if header not in self: - return failobj - for k, v in self._get_params_preserve(failobj, header): - if k.lower() == param.lower(): - if unquote: - return _unquotevalue(v) - else: - return v - return failobj - - def set_param(self, param, value, header='Content-Type', requote=True, - charset=None, language=''): - """Set a parameter in the Content-Type header. - - If the parameter already exists in the header, its value will be - replaced with the new value. - - If header is Content-Type and has not yet been defined for this - message, it will be set to "text/plain" and the new parameter and - value will be appended as per RFC 2045. - - An alternate header can specified in the header argument, and all - parameters will be quoted as necessary unless requote is False. - - If charset is specified, the parameter will be encoded according to RFC - 2231. Optional language specifies the RFC 2231 language, defaulting - to the empty string. Both charset and language should be strings. - """ - if not isinstance(value, tuple) and charset: - value = (charset, language, value) - - if header not in self and header.lower() == 'content-type': - ctype = 'text/plain' - else: - ctype = self.get(header) - if not self.get_param(param, header=header): - if not ctype: - ctype = _formatparam(param, value, requote) - else: - ctype = SEMISPACE.join( - [ctype, _formatparam(param, value, requote)]) - else: - ctype = '' - for old_param, old_value in self.get_params(header=header, - unquote=requote): - append_param = '' - if old_param.lower() == param.lower(): - append_param = _formatparam(param, value, requote) - else: - append_param = _formatparam(old_param, old_value, requote) - if not ctype: - ctype = append_param - else: - ctype = SEMISPACE.join([ctype, append_param]) - if ctype != self.get(header): - del self[header] - self[header] = ctype - - def del_param(self, param, header='content-type', requote=True): - """Remove the given parameter completely from the Content-Type header. - - The header will be re-written in place without the parameter or its - value. All values will be quoted as necessary unless requote is - False. Optional header specifies an alternative to the Content-Type - header. - """ - if header not in self: - return - new_ctype = '' - for p, v in self.get_params(header=header, unquote=requote): - if p.lower() != param.lower(): - if not new_ctype: - new_ctype = _formatparam(p, v, requote) - else: - new_ctype = SEMISPACE.join([new_ctype, - _formatparam(p, v, requote)]) - if new_ctype != self.get(header): - del self[header] - self[header] = new_ctype - - def set_type(self, type, header='Content-Type', requote=True): - """Set the main type and subtype for the Content-Type header. - - type must be a string in the form "maintype/subtype", otherwise a - ValueError is raised. - - This method replaces the Content-Type header, keeping all the - parameters in place. If requote is False, this leaves the existing - header's quoting as is. Otherwise, the parameters will be quoted (the - default). - - An alternative header can be specified in the header argument. When - the Content-Type header is set, we'll always also add a MIME-Version - header. - """ - # BAW: should we be strict? - if not type.count('/') == 1: - raise ValueError - # Set the Content-Type, you get a MIME-Version - if header.lower() == 'content-type': - del self['mime-version'] - self['MIME-Version'] = '1.0' - if header not in self: - self[header] = type - return - params = self.get_params(header=header, unquote=requote) - del self[header] - self[header] = type - # Skip the first param; it's the old type. - for p, v in params[1:]: - self.set_param(p, v, header, requote) - - def get_filename(self, failobj=None): - """Return the filename associated with the payload if present. - - The filename is extracted from the Content-Disposition header's - `filename' parameter, and it is unquoted. If that header is missing - the `filename' parameter, this method falls back to looking for the - `name' parameter. - """ - missing = object() - filename = self.get_param('filename', missing, 'content-disposition') - if filename is missing: - filename = self.get_param('name', missing, 'content-type') - if filename is missing: - return failobj - return utils.collapse_rfc2231_value(filename).strip() - - def get_boundary(self, failobj=None): - """Return the boundary associated with the payload if present. - - The boundary is extracted from the Content-Type header's `boundary' - parameter, and it is unquoted. - """ - missing = object() - boundary = self.get_param('boundary', missing) - if boundary is missing: - return failobj - # RFC 2046 says that boundaries may begin but not end in w/s - return utils.collapse_rfc2231_value(boundary).rstrip() - - def set_boundary(self, boundary): - """Set the boundary parameter in Content-Type to 'boundary'. - - This is subtly different than deleting the Content-Type header and - adding a new one with a new boundary parameter via add_header(). The - main difference is that using the set_boundary() method preserves the - order of the Content-Type header in the original message. - - HeaderParseError is raised if the message has no Content-Type header. - """ - missing = object() - params = self._get_params_preserve(missing, 'content-type') - if params is missing: - # There was no Content-Type header, and we don't know what type - # to set it to, so raise an exception. - raise errors.HeaderParseError('No Content-Type header found') - newparams = list() - foundp = False - for pk, pv in params: - if pk.lower() == 'boundary': - newparams.append(('boundary', '"%s"' % boundary)) - foundp = True - else: - newparams.append((pk, pv)) - if not foundp: - # The original Content-Type header had no boundary attribute. - # Tack one on the end. BAW: should we raise an exception - # instead??? - newparams.append(('boundary', '"%s"' % boundary)) - # Replace the existing Content-Type header with the new value - newheaders = list() - for h, v in self._headers: - if h.lower() == 'content-type': - parts = list() - for k, v in newparams: - if v == '': - parts.append(k) - else: - parts.append('%s=%s' % (k, v)) - val = SEMISPACE.join(parts) - newheaders.append(self.policy.header_store_parse(h, val)) - - else: - newheaders.append((h, v)) - self._headers = newheaders - - def get_content_charset(self, failobj=None): - """Return the charset parameter of the Content-Type header. - - The returned string is always coerced to lower case. If there is no - Content-Type header, or if that header has no charset parameter, - failobj is returned. - """ - missing = object() - charset = self.get_param('charset', missing) - if charset is missing: - return failobj - if isinstance(charset, tuple): - # RFC 2231 encoded, so decode it, and it better end up as ascii. - pcharset = charset[0] or 'us-ascii' - try: - # LookupError will be raised if the charset isn't known to - # Python. UnicodeError will be raised if the encoded text - # contains a character not in the charset. - as_bytes = charset[2].encode('raw-unicode-escape') - charset = str(as_bytes, pcharset) - except (LookupError, UnicodeError): - charset = charset[2] - # charset characters must be in us-ascii range - try: - charset.encode('us-ascii') - except UnicodeError: - return failobj - # RFC 2046, $4.1.2 says charsets are not case sensitive - return charset.lower() - - def get_charsets(self, failobj=None): - """Return a list containing the charset(s) used in this message. - - The returned list of items describes the Content-Type headers' - charset parameter for this message and all the subparts in its - payload. - - Each item will either be a string (the value of the charset parameter - in the Content-Type header of that part) or the value of the - 'failobj' parameter (defaults to None), if the part does not have a - main MIME type of "text", or the charset is not defined. - - The list will contain one string for each part of the message, plus - one for the container message (i.e. self), so that a non-multipart - message will still return a list of length 1. - """ - return [part.get_content_charset(failobj) for part in self.walk()] - - # I.e. def walk(self): ... - from future.backports.email.iterators import walk diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/__init__.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/__init__.py +++ /dev/null diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/application.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/application.py deleted file mode 100644 index 5cbfb17..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/application.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Keith Dart -# Contact: email-sig@python.org - -"""Class representing application/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -from future.backports.email import encoders -from future.backports.email.mime.nonmultipart import MIMENonMultipart - -__all__ = ["MIMEApplication"] - - -class MIMEApplication(MIMENonMultipart): - """Class for generating application/* MIME documents.""" - - def __init__(self, _data, _subtype='octet-stream', - _encoder=encoders.encode_base64, **_params): - """Create an application/* type MIME document. - - _data is a string containing the raw application data. - - _subtype is the MIME content type subtype, defaulting to - 'octet-stream'. - - _encoder is a function which will perform the actual encoding for - transport of the application data, defaulting to base64 encoding. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - raise TypeError('Invalid application MIME subtype') - MIMENonMultipart.__init__(self, 'application', _subtype, **_params) - self.set_payload(_data) - _encoder(self) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/audio.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/audio.py deleted file mode 100644 index 4989c11..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/audio.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Anthony Baxter -# Contact: email-sig@python.org - -"""Class representing audio/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEAudio'] - -import sndhdr - -from io import BytesIO -from future.backports.email import encoders -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = BytesIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - -class MIMEAudio(MIMENonMultipart): - """Class for generating audio/* MIME documents.""" - - def __init__(self, _audiodata, _subtype=None, - _encoder=encoders.encode_base64, **_params): - """Create an audio/* type MIME document. - - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific audio subtype via the - _subtype parameter. If _subtype is not given, and no subtype can be - guessed, a TypeError is raised. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = _whatsnd(_audiodata) - if _subtype is None: - raise TypeError('Could not find audio MIME subtype') - MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) - self.set_payload(_audiodata) - _encoder(self) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/base.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/base.py deleted file mode 100644 index e77f3ca..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/base.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME specializations.""" -from __future__ import absolute_import, division, unicode_literals -from future.backports.email import message - -__all__ = ['MIMEBase'] - - -class MIMEBase(message.Message): - """Base class for MIME specializations.""" - - def __init__(self, _maintype, _subtype, **_params): - """This constructor adds a Content-Type: and a MIME-Version: header. - - The Content-Type: header is taken from the _maintype and _subtype - arguments. Additional parameters for this header are taken from the - keyword arguments. - """ - message.Message.__init__(self) - ctype = '%s/%s' % (_maintype, _subtype) - self.add_header('Content-Type', ctype, **_params) - self['MIME-Version'] = '1.0' diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/image.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/image.py deleted file mode 100644 index a036024..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/image.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing image/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEImage'] - -import imghdr - -from future.backports.email import encoders -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -class MIMEImage(MIMENonMultipart): - """Class for generating image/* type MIME documents.""" - - def __init__(self, _imagedata, _subtype=None, - _encoder=encoders.encode_base64, **_params): - """Create an image/* type MIME document. - - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) - if _subtype is None: - raise TypeError('Could not guess image MIME subtype') - MIMENonMultipart.__init__(self, 'image', _subtype, **_params) - self.set_payload(_imagedata) - _encoder(self) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/message.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/message.py deleted file mode 100644 index 7f92075..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/message.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing message/* MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEMessage'] - -from future.backports.email import message -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -class MIMEMessage(MIMENonMultipart): - """Class representing message/* MIME documents.""" - - def __init__(self, _msg, _subtype='rfc822'): - """Create a message/* type MIME document. - - _msg is a message object and must be an instance of Message, or a - derived class of Message, otherwise a TypeError is raised. - - Optional _subtype defines the subtype of the contained message. The - default is "rfc822" (this is defined by the MIME standard, even though - the term "rfc822" is technically outdated by RFC 2822). - """ - MIMENonMultipart.__init__(self, 'message', _subtype) - if not isinstance(_msg, message.Message): - raise TypeError('Argument is not an instance of Message') - # It's convenient to use this base class method. We need to do it - # this way or we'll get an exception - message.Message.attach(self, _msg) - # And be sure our default type is set correctly - self.set_default_type('message/rfc822') diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/multipart.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/multipart.py deleted file mode 100644 index 6d7ed3d..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/multipart.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME multipart/* type messages.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEMultipart'] - -from future.backports.email.mime.base import MIMEBase - - -class MIMEMultipart(MIMEBase): - """Base class for MIME multipart/* type messages.""" - - def __init__(self, _subtype='mixed', boundary=None, _subparts=None, - **_params): - """Creates a multipart/* type message. - - By default, creates a multipart/mixed message, with proper - Content-Type and MIME-Version headers. - - _subtype is the subtype of the multipart content type, defaulting to - `mixed'. - - boundary is the multipart boundary string. By default it is - calculated as needed. - - _subparts is a sequence of initial subparts for the payload. It - must be an iterable object, such as a list. You can always - attach new subparts to the message by using the attach() method. - - Additional parameters for the Content-Type header are taken from the - keyword arguments (or passed into the _params argument). - """ - MIMEBase.__init__(self, 'multipart', _subtype, **_params) - - # Initialise _payload to an empty list as the Message superclass's - # implementation of is_multipart assumes that _payload is a list for - # multipart messages. - self._payload = [] - - if _subparts: - for p in _subparts: - self.attach(p) - if boundary: - self.set_boundary(boundary) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/nonmultipart.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/nonmultipart.py deleted file mode 100644 index 08c37c3..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/nonmultipart.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2002-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME type messages that are not multipart.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMENonMultipart'] - -from future.backports.email import errors -from future.backports.email.mime.base import MIMEBase - - -class MIMENonMultipart(MIMEBase): - """Base class for MIME multipart/* type messages.""" - - def attach(self, payload): - # The public API prohibits attaching multiple subparts to MIMEBase - # derived subtypes since none of them are, by definition, of content - # type multipart/* - raise errors.MultipartConversionError( - 'Cannot attach additional subparts to non-multipart/*') diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/text.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/text.py deleted file mode 100644 index 6269f4a..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/mime/text.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing text/* type MIME documents.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['MIMEText'] - -from future.backports.email.encoders import encode_7or8bit -from future.backports.email.mime.nonmultipart import MIMENonMultipart - - -class MIMEText(MIMENonMultipart): - """Class for generating text/* type MIME documents.""" - - def __init__(self, _text, _subtype='plain', _charset=None): - """Create a text/* type MIME document. - - _text is the string for this message object. - - _subtype is the MIME sub content type, defaulting to "plain". - - _charset is the character set parameter added to the Content-Type - header. This defaults to "us-ascii". Note that as a side-effect, the - Content-Transfer-Encoding header will also be set. - """ - - # If no _charset was specified, check to see if there are non-ascii - # characters present. If not, use 'us-ascii', otherwise use utf-8. - # XXX: This can be removed once #7304 is fixed. - if _charset is None: - try: - _text.encode('us-ascii') - _charset = 'us-ascii' - except UnicodeEncodeError: - _charset = 'utf-8' - - MIMENonMultipart.__init__(self, 'text', _subtype, - **{'charset': _charset}) - - self.set_payload(_text, _charset) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/parser.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/parser.py deleted file mode 100644 index df1c6e2..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/parser.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (C) 2001-2007 Python Software Foundation -# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter -# Contact: email-sig@python.org - -"""A parser of RFC 2822 and MIME email messages.""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import - -__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] - -import warnings -from io import StringIO, TextIOWrapper - -from future.backports.email.feedparser import FeedParser, BytesFeedParser -from future.backports.email.message import Message -from future.backports.email._policybase import compat32 - - -class Parser(object): - def __init__(self, _class=Message, **_3to2kwargs): - """Parser of RFC 2822 and MIME email messages. - - Creates an in-memory object tree representing the email message, which - can then be manipulated and turned over to a Generator to return the - textual representation of the message. - - The string must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceeded by a `Unix-from' header. The - header block is terminated either by the end of the string or by a - blank line. - - _class is the class to instantiate for new message objects when they - must be created. This class must have a constructor that can take - zero arguments. Default is Message.Message. - - The policy keyword specifies a policy object that controls a number of - aspects of the parser's operation. The default policy maintains - backward compatibility. - - """ - if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] - else: policy = compat32 - self._class = _class - self.policy = policy - - def parse(self, fp, headersonly=False): - """Create a message structure from the data in a file. - - Reads all the data from the file and returns the root of the message - structure. Optional headersonly is a flag specifying whether to stop - parsing after reading the headers or not. The default is False, - meaning it parses the entire contents of the file. - """ - feedparser = FeedParser(self._class, policy=self.policy) - if headersonly: - feedparser._set_headersonly() - while True: - data = fp.read(8192) - if not data: - break - feedparser.feed(data) - return feedparser.close() - - def parsestr(self, text, headersonly=False): - """Create a message structure from a string. - - Returns the root of the message structure. Optional headersonly is a - flag specifying whether to stop parsing after reading the headers or - not. The default is False, meaning it parses the entire contents of - the file. - """ - return self.parse(StringIO(text), headersonly=headersonly) - - - -class HeaderParser(Parser): - def parse(self, fp, headersonly=True): - return Parser.parse(self, fp, True) - - def parsestr(self, text, headersonly=True): - return Parser.parsestr(self, text, True) - - -class BytesParser(object): - - def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. - - Creates an in-memory object tree representing the email message, which - can then be manipulated and turned over to a Generator to return the - textual representation of the message. - - The input must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceeded by a `Unix-from' header. The - header block is terminated either by the end of the input or by a - blank line. - - _class is the class to instantiate for new message objects when they - must be created. This class must have a constructor that can take - zero arguments. Default is Message.Message. - """ - self.parser = Parser(*args, **kw) - - def parse(self, fp, headersonly=False): - """Create a message structure from the data in a binary file. - - Reads all the data from the file and returns the root of the message - structure. Optional headersonly is a flag specifying whether to stop - parsing after reading the headers or not. The default is False, - meaning it parses the entire contents of the file. - """ - fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') - with fp: - return self.parser.parse(fp, headersonly) - - - def parsebytes(self, text, headersonly=False): - """Create a message structure from a byte string. - - Returns the root of the message structure. Optional headersonly is a - flag specifying whether to stop parsing after reading the headers or - not. The default is False, meaning it parses the entire contents of - the file. - """ - text = text.decode('ASCII', errors='surrogateescape') - return self.parser.parsestr(text, headersonly) - - -class BytesHeaderParser(BytesParser): - def parse(self, fp, headersonly=True): - return BytesParser.parse(self, fp, headersonly=True) - - def parsebytes(self, text, headersonly=True): - return BytesParser.parsebytes(self, text, headersonly=True) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/policy.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/policy.py deleted file mode 100644 index 2f609a2..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/policy.py +++ /dev/null @@ -1,193 +0,0 @@ -"""This will be the home for the policy that hooks in the new -code that adds all the email6 features. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import super - -from future.standard_library.email._policybase import (Policy, Compat32, - compat32, _extend_docstrings) -from future.standard_library.email.utils import _has_surrogates -from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry - -__all__ = [ - 'Compat32', - 'compat32', - 'Policy', - 'EmailPolicy', - 'default', - 'strict', - 'SMTP', - 'HTTP', - ] - -@_extend_docstrings -class EmailPolicy(Policy): - - """+ - PROVISIONAL - - The API extensions enabled by this policy are currently provisional. - Refer to the documentation for details. - - This policy adds new header parsing and folding algorithms. Instead of - simple strings, headers are custom objects with custom attributes - depending on the type of the field. The folding algorithm fully - implements RFCs 2047 and 5322. - - In addition to the settable attributes listed above that apply to - all Policies, this policy adds the following additional attributes: - - refold_source -- if the value for a header in the Message object - came from the parsing of some source, this attribute - indicates whether or not a generator should refold - that value when transforming the message back into - stream form. The possible values are: - - none -- all source values use original folding - long -- source values that have any line that is - longer than max_line_length will be - refolded - all -- all values are refolded. - - The default is 'long'. - - header_factory -- a callable that takes two arguments, 'name' and - 'value', where 'name' is a header field name and - 'value' is an unfolded header field value, and - returns a string-like object that represents that - header. A default header_factory is provided that - understands some of the RFC5322 header field types. - (Currently address fields and date fields have - special treatment, while all other fields are - treated as unstructured. This list will be - completed before the extension is marked stable.) - """ - - refold_source = 'long' - header_factory = HeaderRegistry() - - def __init__(self, **kw): - # Ensure that each new instance gets a unique header factory - # (as opposed to clones, which share the factory). - if 'header_factory' not in kw: - object.__setattr__(self, 'header_factory', HeaderRegistry()) - super().__init__(**kw) - - def header_max_count(self, name): - """+ - The implementation for this class returns the max_count attribute from - the specialized header class that would be used to construct a header - of type 'name'. - """ - return self.header_factory[name].max_count - - # The logic of the next three methods is chosen such that it is possible to - # switch a Message object between a Compat32 policy and a policy derived - # from this class and have the results stay consistent. This allows a - # Message object constructed with this policy to be passed to a library - # that only handles Compat32 objects, or to receive such an object and - # convert it to use the newer style by just changing its policy. It is - # also chosen because it postpones the relatively expensive full rfc5322 - # parse until as late as possible when parsing from source, since in many - # applications only a few headers will actually be inspected. - - def header_source_parse(self, sourcelines): - """+ - The name is parsed as everything up to the ':' and returned unmodified. - The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and - stripping any trailing carriage return or linefeed characters. (This - is the same as Compat32). - - """ - name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) - return (name, value.rstrip('\r\n')) - - def header_store_parse(self, name, value): - """+ - The name is returned unchanged. If the input value has a 'name' - attribute and it matches the name ignoring case, the value is returned - unchanged. Otherwise the name and value are passed to header_factory - method, and the resulting custom header object is returned as the - value. In this case a ValueError is raised if the input value contains - CR or LF characters. - - """ - if hasattr(value, 'name') and value.name.lower() == name.lower(): - return (name, value) - if isinstance(value, str) and len(value.splitlines())>1: - raise ValueError("Header values may not contain linefeed " - "or carriage return characters") - return (name, self.header_factory(name, value)) - - def header_fetch_parse(self, name, value): - """+ - If the value has a 'name' attribute, it is returned to unmodified. - Otherwise the name and the value with any linesep characters removed - are passed to the header_factory method, and the resulting custom - header object is returned. Any surrogateescaped bytes get turned - into the unicode unknown-character glyph. - - """ - if hasattr(value, 'name'): - return value - return self.header_factory(name, ''.join(value.splitlines())) - - def fold(self, name, value): - """+ - Header folding is controlled by the refold_source policy setting. A - value is considered to be a 'source value' if and only if it does not - have a 'name' attribute (having a 'name' attribute means it is a header - object of some sort). If a source value needs to be refolded according - to the policy, it is converted into a custom header object by passing - the name and the value with any linesep characters removed to the - header_factory method. Folding of a custom header object is done by - calling its fold method with the current policy. - - Source values are split into lines using splitlines. If the value is - not to be refolded, the lines are rejoined using the linesep from the - policy and returned. The exception is lines containing non-ascii - binary data. In that case the value is refolded regardless of the - refold_source setting, which causes the binary data to be CTE encoded - using the unknown-8bit charset. - - """ - return self._fold(name, value, refold_binary=True) - - def fold_binary(self, name, value): - """+ - The same as fold if cte_type is 7bit, except that the returned value is - bytes. - - If cte_type is 8bit, non-ASCII binary data is converted back into - bytes. Headers with binary data are not refolded, regardless of the - refold_header setting, since there is no way to know whether the binary - data consists of single byte characters or multibyte characters. - - """ - folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') - return folded.encode('ascii', 'surrogateescape') - - def _fold(self, name, value, refold_binary=False): - if hasattr(value, 'name'): - return value.fold(policy=self) - maxlen = self.max_line_length if self.max_line_length else float('inf') - lines = value.splitlines() - refold = (self.refold_source == 'all' or - self.refold_source == 'long' and - (lines and len(lines[0])+len(name)+2 > maxlen or - any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): - return self.header_factory(name, ''.join(lines)).fold(policy=self) - return name + ': ' + self.linesep.join(lines) + self.linesep - - -default = EmailPolicy() -# Make the default policy use the class default header_factory -del default.header_factory -strict = default.clone(raise_on_defect=True) -SMTP = default.clone(linesep='\r\n') -HTTP = default.clone(linesep='\r\n', max_line_length=None) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/quoprimime.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/quoprimime.py deleted file mode 100644 index b69d158..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/quoprimime.py +++ /dev/null @@ -1,326 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Ben Gertzfield -# Contact: email-sig@python.org - -"""Quoted-printable content transfer encoding per RFCs 2045-2047. - -This module handles the content transfer encoding method defined in RFC 2045 -to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to -safely encode text that is in a character set similar to the 7-bit US ASCII -character set, but that includes some 8-bit characters that are normally not -allowed in email bodies or headers. - -Quoted-printable is very space-inefficient for encoding binary files; use the -email.base64mime module for that instead. - -This module provides an interface to encode and decode both headers and bodies -with quoted-printable encoding. - -RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names -in To:/From:/Cc: etc. fields, as well as Subject: lines. - -This module does not do the line wrapping or end-of-line character -conversion necessary for proper internationalized headers; it only -does dumb encoding and decoding. To deal with the various line -wrapping issues, use the email.header module. -""" -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future.builtins import bytes, chr, dict, int, range, super - -__all__ = [ - 'body_decode', - 'body_encode', - 'body_length', - 'decode', - 'decodestring', - 'header_decode', - 'header_encode', - 'header_length', - 'quote', - 'unquote', - ] - -import re -import io - -from string import ascii_letters, digits, hexdigits - -CRLF = '\r\n' -NL = '\n' -EMPTYSTRING = '' - -# Build a mapping of octets to the expansion of that octet. Since we're only -# going to have 256 of these things, this isn't terribly inefficient -# space-wise. Remember that headers and bodies have different sets of safe -# characters. Initialize both maps with the full expansion, and then override -# the safe bytes with the more compact form. -_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) -_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() - -# Safe header bytes which need no encoding. -for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')): - _QUOPRI_HEADER_MAP[c] = chr(c) -# Headers have one other special encoding; spaces become underscores. -_QUOPRI_HEADER_MAP[ord(' ')] = '_' - -# Safe body bytes which need no encoding. -for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>' - b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' - b'abcdefghijklmnopqrstuvwxyz{|}~\t'): - _QUOPRI_BODY_MAP[c] = chr(c) - - - -# Helpers -def header_check(octet): - """Return True if the octet should be escaped with header quopri.""" - return chr(octet) != _QUOPRI_HEADER_MAP[octet] - - -def body_check(octet): - """Return True if the octet should be escaped with body quopri.""" - return chr(octet) != _QUOPRI_BODY_MAP[octet] - - -def header_length(bytearray): - """Return a header quoted-printable encoding length. - - Note that this does not include any RFC 2047 chrome added by - `header_encode()`. - - :param bytearray: An array of bytes (a.k.a. octets). - :return: The length in bytes of the byte array when it is encoded with - quoted-printable for headers. - """ - return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) - - -def body_length(bytearray): - """Return a body quoted-printable encoding length. - - :param bytearray: An array of bytes (a.k.a. octets). - :return: The length in bytes of the byte array when it is encoded with - quoted-printable for bodies. - """ - return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) - - -def _max_append(L, s, maxlen, extra=''): - if not isinstance(s, str): - s = chr(s) - if not L: - L.append(s.lstrip()) - elif len(L[-1]) + len(s) <= maxlen: - L[-1] += extra + s - else: - L.append(s.lstrip()) - - -def unquote(s): - """Turn a string in the form =AB to the ASCII character with value 0xab""" - return chr(int(s[1:3], 16)) - - -def quote(c): - return '=%02X' % ord(c) - - - -def header_encode(header_bytes, charset='iso-8859-1'): - """Encode a single header line with quoted-printable (like) encoding. - - Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but - used specifically for email header fields to allow charsets with mostly 7 - bit characters (and some 8 bit) to remain more or less readable in non-RFC - 2045 aware mail clients. - - charset names the character set to use in the RFC 2046 header. It - defaults to iso-8859-1. - """ - # Return empty headers as an empty string. - if not header_bytes: - return '' - # Iterate over every byte, encoding if necessary. - encoded = [] - for octet in header_bytes: - encoded.append(_QUOPRI_HEADER_MAP[octet]) - # Now add the RFC chrome to each encoded chunk and glue the chunks - # together. - return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) - - -class _body_accumulator(io.StringIO): - - def __init__(self, maxlinelen, eol, *args, **kw): - super().__init__(*args, **kw) - self.eol = eol - self.maxlinelen = self.room = maxlinelen - - def write_str(self, s): - """Add string s to the accumulated body.""" - self.write(s) - self.room -= len(s) - - def newline(self): - """Write eol, then start new line.""" - self.write_str(self.eol) - self.room = self.maxlinelen - - def write_soft_break(self): - """Write a soft break, then start a new line.""" - self.write_str('=') - self.newline() - - def write_wrapped(self, s, extra_room=0): - """Add a soft line break if needed, then write s.""" - if self.room < len(s) + extra_room: - self.write_soft_break() - self.write_str(s) - - def write_char(self, c, is_last_char): - if not is_last_char: - # Another character follows on this line, so we must leave - # extra room, either for it or a soft break, and whitespace - # need not be quoted. - self.write_wrapped(c, extra_room=1) - elif c not in ' \t': - # For this and remaining cases, no more characters follow, - # so there is no need to reserve extra room (since a hard - # break will immediately follow). - self.write_wrapped(c) - elif self.room >= 3: - # It's a whitespace character at end-of-line, and we have room - # for the three-character quoted encoding. - self.write(quote(c)) - elif self.room == 2: - # There's room for the whitespace character and a soft break. - self.write(c) - self.write_soft_break() - else: - # There's room only for a soft break. The quoted whitespace - # will be the only content on the subsequent line. - self.write_soft_break() - self.write(quote(c)) - - -def body_encode(body, maxlinelen=76, eol=NL): - """Encode with quoted-printable, wrapping at maxlinelen characters. - - Each line of encoded text will end with eol, which defaults to "\\n". Set - this to "\\r\\n" if you will be using the result of this function directly - in an email. - - Each line will be wrapped at, at most, maxlinelen characters before the - eol string (maxlinelen defaults to 76 characters, the maximum value - permitted by RFC 2045). Long lines will have the 'soft line break' - quoted-printable character "=" appended to them, so the decoded text will - be identical to the original text. - - The minimum maxlinelen is 4 to have room for a quoted character ("=XX") - followed by a soft line break. Smaller values will generate a - ValueError. - - """ - - if maxlinelen < 4: - raise ValueError("maxlinelen must be at least 4") - if not body: - return body - - # The last line may or may not end in eol, but all other lines do. - last_has_eol = (body[-1] in '\r\n') - - # This accumulator will make it easier to build the encoded body. - encoded_body = _body_accumulator(maxlinelen, eol) - - lines = body.splitlines() - last_line_no = len(lines) - 1 - for line_no, line in enumerate(lines): - last_char_index = len(line) - 1 - for i, c in enumerate(line): - if body_check(ord(c)): - c = quote(c) - encoded_body.write_char(c, i==last_char_index) - # Add an eol if input line had eol. All input lines have eol except - # possibly the last one. - if line_no < last_line_no or last_has_eol: - encoded_body.newline() - - return encoded_body.getvalue() - - - -# BAW: I'm not sure if the intent was for the signature of this function to be -# the same as base64MIME.decode() or not... -def decode(encoded, eol=NL): - """Decode a quoted-printable string. - - Lines are separated with eol, which defaults to \\n. - """ - if not encoded: - return encoded - # BAW: see comment in encode() above. Again, we're building up the - # decoded string with string concatenation, which could be done much more - # efficiently. - decoded = '' - - for line in encoded.splitlines(): - line = line.rstrip() - if not line: - decoded += eol - continue - - i = 0 - n = len(line) - while i < n: - c = line[i] - if c != '=': - decoded += c - i += 1 - # Otherwise, c == "=". Are we at the end of the line? If so, add - # a soft line break. - elif i+1 == n: - i += 1 - continue - # Decode if in form =AB - elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: - decoded += unquote(line[i:i+3]) - i += 3 - # Otherwise, not in form =AB, pass literally - else: - decoded += c - i += 1 - - if i == n: - decoded += eol - # Special case if original string did not end with eol - if encoded[-1] not in '\r\n' and decoded.endswith(eol): - decoded = decoded[:-1] - return decoded - - -# For convenience and backwards compatibility w/ standard base64 module -body_decode = decode -decodestring = decode - - - -def _unquote_match(match): - """Turn a match in the form =AB to the ASCII character with value 0xab""" - s = match.group(0) - return unquote(s) - - -# Header decoding is done a bit differently -def header_decode(s): - """Decode a string encoded with RFC 2045 MIME header `Q' encoding. - - This function does not parse a full MIME header value encoded with - quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use - the high level email.header class for that functionality. - """ - s = s.replace('_', ' ') - return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII) diff --git a/Python/Dependencies/future-0.18.2/src/future/backports/email/utils.py b/Python/Dependencies/future-0.18.2/src/future/backports/email/utils.py deleted file mode 100644 index 4abebf7..0000000 --- a/Python/Dependencies/future-0.18.2/src/future/backports/email/utils.py +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (C) 2001-2010 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Miscellaneous utilities.""" - -from __future__ import unicode_literals -from __future__ import division -from __future__ import absolute_import -from future import utils -from future.builtins import bytes, int, str - -__all__ = [ - 'collapse_rfc2231_value', - 'decode_params', - 'decode_rfc2231', - 'encode_rfc2231', - 'formataddr', - 'formatdate', - 'format_datetime', - 'getaddresses', - 'make_msgid', - 'mktime_tz', - 'parseaddr', - 'parsedate', - 'parsedate_tz', - 'parsedate_to_datetime', - 'unquote', - ] - -import os -import re -if utils.PY2: - re.ASCII = 0 -import time -import base64 -import random -import socket -from future.backports import datetime -from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote -import warnings -from io import StringIO - -from future.backports.email._parseaddr import quote -from future.backports.email._parseaddr import AddressList as _AddressList -from future.backports.email._parseaddr import mktime_tz - -from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz - -from quopri import decodestring as _qdecode - -# Intrapackage imports -from future.backports.email.encoders import _bencode, _qencode -from future.backports.email.charset import Charset - -COMMASPACE = ', ' -EMPTYSTRING = '' -UEMPTYSTRING = '' -CRLF = '\r\n' -TICK = "'" - -specialsre = re.compile(r'[][\\()<>@,:;".]') -escapesre = re.compile(r'[\\"]') - -# How to figure out if we are processing strings that come from a byte -# source with undecodable characters. -_has_surrogates = re.compile( - '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search - -# How to deal with a string containing bytes before handing it to the -# application through the 'normal' interface. -def _sanitize(string): - # Turn any escaped bytes into unicode 'unknown' char. - original_bytes = string.encode('ascii', 'surrogateescape') - return original_bytes.decode('ascii', 'replace') - - -# Helpers - -def formataddr(pair, charset='utf-8'): - """The inverse of parseaddr(), this takes a 2-tuple of the form - (realname, email_address) and returns the string value suitable - for an RFC 2822 From, To or Cc header. - - If the first element of pair is false, then the second element is - returned unmodified. - - Optional charset if given is the character set that is used to encode - realname in case realname is not ASCII safe. Can be an instance of str or - a Charset-like object which has a header_encode method. Default is - 'utf-8'. - """ - name, address = pair - # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. - address.encode('ascii') - if name: - try: - name.encode('ascii') - except UnicodeEncodeError: - if isinstance(charset, str): - charset = Charset(charset) - encoded_name = charset.header_encode(name) - return "%s <%s>" % (encoded_name, address) - else: - quotes = '' - if specialsre.search(name): - quotes = '"' - name = escapesre.sub(r'\\\g<0>', name) - return '%s%s%s <%s>' % (quotes, name, quotes, address) - return address - - - -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(fieldvalues) - a = _AddressList(all) - return a.addresslist - - - -ecre = re.compile(r''' - =\? # literal =? - (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P<encoding>[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P<atom>.*?) # non-greedy up to the next ?= is the atom - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) - - -def _format_timetuple_and_zone(timetuple, zone): - return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( - ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], - timetuple[2], - ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], - timetuple[0], timetuple[3], timetuple[4], timetuple[5], - zone) - -def formatdate(timeval=None, localtime=False, usegmt=False): - """Returns a date string as specified by RFC 2822, e.g.: - - Fri, 09 Nov 2001 01:08:47 -0000 - - Optional timeval if given is a floating point time value as accepted by - gmtime() and localtime(), otherwise the current time is used. - - Optional localtime is a flag that when True, interprets timeval, and - returns a date relative to the local timezone instead of UTC, properly - taking daylight savings time into account. - - Optional argument usegmt means that the timezone is written out as - an ascii string, not numeric one (so "GMT" instead of "+0000"). This - is needed for HTTP, and is only used when localtime==False. - """ - # Note: we cannot use strftime() because that honors the locale and RFC - # 2822 requires that day and month names be the English abbreviations. - if timeval is None: - timeval = time.time() - if localtime: - now = time.localtime(timeval) - # Calculate timezone offset, based on whether the local zone has - # daylight savings time, and whether DST is in effect. - if time.daylight and now[-1]: - offset = time.altzone - else: - offset = time.timezone - hours, minutes = divmod(abs(offset), 3600) - # Remember offset is in seconds west of UTC, but the timezone is in - # minutes east of UTC, so the signs differ. - if offset > 0: - sign = '-' - else: - sign = '+' - zone = '%s%02d%02d' % (sign, hours, minutes // 60) - else: - now = time.gmtime(timeval) - # Timezone offset is always -0000 - if usegmt: - zone = 'GMT' - else: - zone = '-0000' - return _format_timetuple_and_zone(now, zone) - -def format_datetime(dt, usegmt=False): - """Turn a datetime into a date string as specified in RFC 2822. - - If usegmt is True, dt must be an aware datetime with an offset of zero. In - this case 'GMT' will be rendered instead of the normal +0000 required by - RFC2822. This is to support HTTP headers involving date stamps. - """ - now = dt.timetuple() - if usegmt: - if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: - raise ValueError("usegmt option requires a UTC datetime") - zone = 'GMT' - elif dt.tzinfo is None: - zone = '-0000' - else: - zone = dt.strftime("%z") - return _format_timetuple_and_zone(now, zone) - - -def make_msgid(idstring=None, domain=None): - """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: - - <20020201195627.33539.96671@nightshade.la.mastaler.com> - - Optional idstring if given is a string used to strengthen the - uniqueness of the message id. Optional domain if given provides the - portion of the message id after the '@'. It defaults to the locally - defined hostname. - """ - timeval = time.time() - utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) - pid = os.getpid() - randint = random.randrange(100000) - if idstring is None: - idstring = '' - else: - idstring = '.' + idstring - if domain is None: - domain = socket.getfqdn() - msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) - return msgid - - -def parsedate_to_datetime(data): - _3to2list = list(_parsedate_tz(data)) - dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:] - if tz is None: - return datetime.datetime(*dtuple[:6]) - return datetime.datetime(*dtuple[:6], - tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) - - -def parseaddr(addr): - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' - return addrs[0] - - -# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. -def unquote(str): - """Remove quotes from a string.""" - if len(str) > 1: - if str.startswith('"') and str.endswith('"'): - return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') - if str.startswith('<') and str.endswith('>'): - return str[1:-1] - return str - - - -# RFC2231-related functions - parameter encoding and decoding -def decode_rfc2231(s): - """Decode string according to RFC 2231""" - parts = s.split(TICK, 2) - if len(parts) <= 2: - return None, None, s - return parts - - -def encode_rfc2231(s, charset=None, language=None): - """Encode string according to RFC 2231. - - If neither charset nor language is given, then s is returned as-is. If - charset is given but not language, the string is encoded using the empty - string for language. - """ - s = url_quote(s, safe='', encoding=charset or 'ascii') - if charset is None and language is None: - return s - if language is None: - language = '' - return "%s'%s'%s" % (charset, language, s) - - -rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$', - re.ASCII) - -def decode_params(params): - """Decode parameters list according to RFC 2231. - - params is a sequence of 2-tuples containing (param name, string value). - """ - # Copy params so we don't mess with the original - params = params[:] - new_params = [] - # Map parameter's name to a list of continuations. The values are a - # 3-tuple of the continuation number, the string value, and a flag - # specifying whether a particular segment is %-encoded. - rfc2231_params = {} - name, value = params.pop(0) - new_params.append((name, value)) - while params: - name, value = params.pop(0) - if name.endswith('*'): - encoded = True - else: - encoded = False - value = unquote(value) - mo = rfc2231_continuation.match(name) - if mo: - name, num = mo.group('name', 'num') - if num is not None: - num = int(num) - rfc2231_params.setdefault(name, []).append((num, value, encoded)) - else: - new_params.append((name, '"%s"' % quote(value))) - if rfc2231_params: - for name, continuations in rfc2231_params.items(): - value = [] - extended = False - # Sort by number - continuations.sort() - # And now append all values in numerical order, converting - # %-encodings for the encoded segments. If any of the - # continuation names ends in a *, then the entire string, after - # decoding segments and concatenating, must have the charset and - # language specifiers at the beginning of the string. - for num, s, encoded in continuations: - if encoded: - # Decode as "latin-1", so the characters in s directly - # represent the percent-encoded octet values. - # collapse_rfc2231_value treats this as an octet sequence. - s = url_unquote(s, encoding="latin-1") - extended = True - value.append(s) - value = quote(EMPTYSTRING.join(value)) - if extended: - charset, language, value = decode_rfc2231(value) - new_params.append((name, (charset, language, '"%s"' % value))) - else: - new_params.append((name, '"%s"' % value)) - return new_params - -def collapse_rfc2231_value(value, errors='replace', - fallback_charset='us-ascii'): - if not isinstance(value, tuple) or len(value) != 3: - return unquote(value) - # While value comes to us as a unicode string, we need it to be a bytes - # object. We do not want bytes() normal utf-8 decoder, we want a straight - # interpretation of the string as character bytes. - charset, language, text = value - rawbytes = bytes(text, 'raw-unicode-escape') - try: - return str(rawbytes, charset, errors) - except LookupError: - # charset is not a known codec. - return unquote(text) - - -# -# datetime doesn't provide a localtime function yet, so provide one. Code -# adapted from the patch in issue 9527. This may not be perfect, but it is -# better than not having it. -# - -def localtime(dt=None, isdst=-1): - """Return local time as an aware datetime object. - - If called without arguments, return current time. Otherwise *dt* - argument should be a datetime instance, and it is converted to the - local time zone according to the system time zone database. If *dt* is - naive (that is, dt.tzinfo is None), it is assumed to be in local time. - In this case, a positive or zero value for *isdst* causes localtime to - presume initially that summer time (for example, Daylight Saving Time) - is or is not (respectively) in effect for the specified time. A - negative value for *isdst* causes the localtime() function to attempt - to divine whether summer time is in effect for the specified time. - - """ - if dt is None: - return datetime.datetime.now(datetime.timezone.utc).astimezone() - if dt.tzinfo is not None: - return dt.astimezone() - # We have a naive datetime. Convert to a (localtime) timetuple and pass to - # system mktime together with the isdst hint. System mktime will return - # seconds since epoch. - tm = dt.timetuple()[:-1] + (isdst,) - seconds = time.mktime(tm) - localtm = time.localtime(seconds) - try: - delta = datetime.timedelta(seconds=localtm.tm_gmtoff) - tz = datetime.timezone(delta, localtm.tm_zone) - except AttributeError: - # Compute UTC offset and compare with the value implied by tm_isdst. - # If the values match, use the zone name implied by tm_isdst. - delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) - dst = time.daylight and localtm.tm_isdst > 0 - gmtoff = -(time.altzone if dst else time.timezone) - if delta == datetime.timedelta(seconds=gmtoff): - tz = datetime.timezone(delta, time.tzname[dst]) - else: - tz = datetime.timezone(delta) - return dt.replace(tzinfo=tz) |
