diff options
| author | yum <yum.food.vr@gmail.com> | 2023-01-23 14:28:53 -0800 |
|---|---|---|
| committer | yum <yum.food.vr@gmail.com> | 2023-01-23 14:32:09 -0800 |
| commit | 9fff496394dcd94c4084694ca96a5e07ab836274 (patch) | |
| tree | d89b78e16ecb6011bdd74555da79f7a8c1d90752 /FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http | |
| parent | 9329d64f991b8b3289af22e4c2eedb09a97c5640 (diff) | |
package.ps1 now fetches all dependencies
Don't literally check in Python since it looks dodgy (rightfully so).
Instead the build script just fetches it.
* Update README, simplifying language and documenting other projects
Diffstat (limited to 'FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http')
5 files changed, 0 insertions, 5280 deletions
diff --git a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/__init__.py b/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/__init__.py +++ /dev/null diff --git a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/client.py b/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/client.py deleted file mode 100644 index e663d12..0000000 --- a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/client.py +++ /dev/null @@ -1,1346 +0,0 @@ -"""HTTP/1.1 client library - -A backport of the Python 3.3 http/client.py module for python-future. - -<intro stuff goes here> -<other stuff, too> - -HTTPConnection goes through a number of "states", which define when a client -may legally make another request or fetch the response for a particular -request. This diagram details these state transitions: - - (null) - | - | HTTPConnection() - v - Idle - | - | putrequest() - v - Request-started - | - | ( putheader() )* endheaders() - v - Request-sent - | - | response = getresponse() - v - Unread-response [Response-headers-read] - |\____________________ - | | - | response.read() | putrequest() - v v - Idle Req-started-unread-response - ______/| - / | - response.read() | | ( putheader() )* endheaders() - v v - Request-started Req-sent-unread-response - | - | response.read() - v - Request-sent - -This diagram presents the following rules: - -- a second request may not be started until {response-headers-read} - -- a response [object] cannot be retrieved until {request-sent} - -- there is no differentiation between an unread response body and a - partially read response body - -Note: this enforcement is applied by the HTTPConnection class. The - HTTPResponse class does not enforce this state machine, which - implies sophisticated clients may accelerate the request/response - pipeline. Caution should be taken, though: accelerating the states - beyond the above pattern may imply knowledge of the server's - connection-close behavior for certain requests. For example, it - is impossible to tell whether the server will close the connection - UNTIL the response headers have been read; this means that further - requests cannot be placed into the pipeline until it is known that - the server will NOT be closing the connection. - -Logical State __state __response -------------- ------- ---------- -Idle _CS_IDLE None -Request-started _CS_REQ_STARTED None -Request-sent _CS_REQ_SENT None -Unread-response _CS_IDLE <response_class> -Req-started-unread-response _CS_REQ_STARTED <response_class> -Req-sent-unread-response _CS_REQ_SENT <response_class> -""" - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future.builtins import bytes, int, str, super -from future.utils import PY2 - -from future.backports.email import parser as email_parser -from future.backports.email import message as email_message -from future.backports.misc import create_connection as socket_create_connection -import io -import os -import socket -from future.backports.urllib.parse import urlsplit -import warnings -from array import array - -if PY2: - from collections import Iterable -else: - from collections.abc import Iterable - -__all__ = ["HTTPResponse", "HTTPConnection", - "HTTPException", "NotConnected", "UnknownProtocol", - "UnknownTransferEncoding", "UnimplementedFileMode", - "IncompleteRead", "InvalidURL", "ImproperConnectionState", - "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "error", "responses"] - -HTTP_PORT = 80 -HTTPS_PORT = 443 - -_UNKNOWN = 'UNKNOWN' - -# connection states -_CS_IDLE = 'Idle' -_CS_REQ_STARTED = 'Request-started' -_CS_REQ_SENT = 'Request-sent' - -# status codes -# informational -CONTINUE = 100 -SWITCHING_PROTOCOLS = 101 -PROCESSING = 102 - -# successful -OK = 200 -CREATED = 201 -ACCEPTED = 202 -NON_AUTHORITATIVE_INFORMATION = 203 -NO_CONTENT = 204 -RESET_CONTENT = 205 -PARTIAL_CONTENT = 206 -MULTI_STATUS = 207 -IM_USED = 226 - -# redirection -MULTIPLE_CHOICES = 300 -MOVED_PERMANENTLY = 301 -FOUND = 302 -SEE_OTHER = 303 -NOT_MODIFIED = 304 -USE_PROXY = 305 -TEMPORARY_REDIRECT = 307 - -# client error -BAD_REQUEST = 400 -UNAUTHORIZED = 401 -PAYMENT_REQUIRED = 402 -FORBIDDEN = 403 -NOT_FOUND = 404 -METHOD_NOT_ALLOWED = 405 -NOT_ACCEPTABLE = 406 -PROXY_AUTHENTICATION_REQUIRED = 407 -REQUEST_TIMEOUT = 408 -CONFLICT = 409 -GONE = 410 -LENGTH_REQUIRED = 411 -PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 -REQUEST_URI_TOO_LONG = 414 -UNSUPPORTED_MEDIA_TYPE = 415 -REQUESTED_RANGE_NOT_SATISFIABLE = 416 -EXPECTATION_FAILED = 417 -UNPROCESSABLE_ENTITY = 422 -LOCKED = 423 -FAILED_DEPENDENCY = 424 -UPGRADE_REQUIRED = 426 -PRECONDITION_REQUIRED = 428 -TOO_MANY_REQUESTS = 429 -REQUEST_HEADER_FIELDS_TOO_LARGE = 431 - -# server error -INTERNAL_SERVER_ERROR = 500 -NOT_IMPLEMENTED = 501 -BAD_GATEWAY = 502 -SERVICE_UNAVAILABLE = 503 -GATEWAY_TIMEOUT = 504 -HTTP_VERSION_NOT_SUPPORTED = 505 -INSUFFICIENT_STORAGE = 507 -NOT_EXTENDED = 510 -NETWORK_AUTHENTICATION_REQUIRED = 511 - -# Mapping status codes to official W3C names -responses = { - 100: 'Continue', - 101: 'Switching Protocols', - - 200: 'OK', - 201: 'Created', - 202: 'Accepted', - 203: 'Non-Authoritative Information', - 204: 'No Content', - 205: 'Reset Content', - 206: 'Partial Content', - - 300: 'Multiple Choices', - 301: 'Moved Permanently', - 302: 'Found', - 303: 'See Other', - 304: 'Not Modified', - 305: 'Use Proxy', - 306: '(Unused)', - 307: 'Temporary Redirect', - - 400: 'Bad Request', - 401: 'Unauthorized', - 402: 'Payment Required', - 403: 'Forbidden', - 404: 'Not Found', - 405: 'Method Not Allowed', - 406: 'Not Acceptable', - 407: 'Proxy Authentication Required', - 408: 'Request Timeout', - 409: 'Conflict', - 410: 'Gone', - 411: 'Length Required', - 412: 'Precondition Failed', - 413: 'Request Entity Too Large', - 414: 'Request-URI Too Long', - 415: 'Unsupported Media Type', - 416: 'Requested Range Not Satisfiable', - 417: 'Expectation Failed', - 428: 'Precondition Required', - 429: 'Too Many Requests', - 431: 'Request Header Fields Too Large', - - 500: 'Internal Server Error', - 501: 'Not Implemented', - 502: 'Bad Gateway', - 503: 'Service Unavailable', - 504: 'Gateway Timeout', - 505: 'HTTP Version Not Supported', - 511: 'Network Authentication Required', -} - -# maximal amount of data to read at one time in _safe_read -MAXAMOUNT = 1048576 - -# maximal line length when calling readline(). -_MAXLINE = 65536 -_MAXHEADERS = 100 - - -class HTTPMessage(email_message.Message): - # XXX The only usage of this method is in - # http.server.CGIHTTPRequestHandler. Maybe move the code there so - # that it doesn't need to be part of the public API. The API has - # never been defined so this could cause backwards compatibility - # issues. - - def getallmatchingheaders(self, name): - """Find all header lines matching a given header name. - - Look through the list of headers and find all lines matching a given - header name (and their continuation lines). A list of the lines is - returned, without interpretation. If the header does not occur, an - empty list is returned. If the header occurs multiple times, all - occurrences are returned. Case is not important in the header name. - - """ - name = name.lower() + ':' - n = len(name) - lst = [] - hit = 0 - for line in self.keys(): - if line[:n].lower() == name: - hit = 1 - elif not line[:1].isspace(): - hit = 0 - if hit: - lst.append(line) - return lst - -def parse_headers(fp, _class=HTTPMessage): - """Parses only RFC2822 headers from a file pointer. - - email Parser wants to see strings rather than bytes. - But a TextIOWrapper around self.rfile would buffer too many bytes - from the stream, bytes which we later need to read as bytes. - So we read the correct bytes here, as bytes, for email Parser - to parse. - - """ - headers = [] - while True: - line = fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - headers.append(line) - if len(headers) > _MAXHEADERS: - raise HTTPException("got more than %d headers" % _MAXHEADERS) - if line in (b'\r\n', b'\n', b''): - break - hstring = bytes(b'').join(headers).decode('iso-8859-1') - return email_parser.Parser(_class=_class).parsestr(hstring) - - -_strict_sentinel = object() - -class HTTPResponse(io.RawIOBase): - - # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. - - # The bytes from the socket object are iso-8859-1 strings. - # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded - # text following RFC 2047. The basic status line parsing only - # accepts iso-8859-1. - - def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): - # If the response includes a content-length header, we need to - # make sure that the client doesn't read more than the - # specified number of bytes. If it does, it will block until - # the server times out and closes the connection. This will - # happen if a self.fp.read() is done (without a size) whether - # self.fp is buffered or not. So, no self.fp.read() by - # clients unless they know what they are doing. - self.fp = sock.makefile("rb") - self.debuglevel = debuglevel - if strict is not _strict_sentinel: - warnings.warn("the 'strict' argument isn't supported anymore; " - "http.client now always assumes HTTP/1.x compliant servers.", - DeprecationWarning, 2) - self._method = method - - # The HTTPResponse object is returned via urllib. The clients - # of http and urllib expect different attributes for the - # headers. headers is used here and supports urllib. msg is - # provided as a backwards compatibility layer for http - # clients. - - self.headers = self.msg = None - - # from the Status-Line of the response - self.version = _UNKNOWN # HTTP-Version - self.status = _UNKNOWN # Status-Code - self.reason = _UNKNOWN # Reason-Phrase - - self.chunked = _UNKNOWN # is "chunked" being used? - self.chunk_left = _UNKNOWN # bytes left to read in current chunk - self.length = _UNKNOWN # number of bytes left in response - self.will_close = _UNKNOWN # conn will close at end of response - - def _read_status(self): - line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") - if len(line) > _MAXLINE: - raise LineTooLong("status line") - if self.debuglevel > 0: - print("reply:", repr(line)) - if not line: - # Presumably, the server closed the connection before - # sending a valid response. - raise BadStatusLine(line) - try: - version, status, reason = line.split(None, 2) - except ValueError: - try: - version, status = line.split(None, 1) - reason = "" - except ValueError: - # empty version will cause next test to fail. - version = "" - if not version.startswith("HTTP/"): - self._close_conn() - raise BadStatusLine(line) - - # The status code is a three-digit number - try: - status = int(status) - if status < 100 or status > 999: - raise BadStatusLine(line) - except ValueError: - raise BadStatusLine(line) - return version, status, reason - - def begin(self): - if self.headers is not None: - # we've already started reading the response - return - - # read until we get a non-100 response - while True: - version, status, reason = self._read_status() - if status != CONTINUE: - break - # skip the header from the 100 response - while True: - skip = self.fp.readline(_MAXLINE + 1) - if len(skip) > _MAXLINE: - raise LineTooLong("header line") - skip = skip.strip() - if not skip: - break - if self.debuglevel > 0: - print("header:", skip) - - self.code = self.status = status - self.reason = reason.strip() - if version in ("HTTP/1.0", "HTTP/0.9"): - # Some servers might still return "0.9", treat it as 1.0 anyway - self.version = 10 - elif version.startswith("HTTP/1."): - self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 - else: - raise UnknownProtocol(version) - - self.headers = self.msg = parse_headers(self.fp) - - if self.debuglevel > 0: - for hdr in self.headers: - print("header:", hdr, end=" ") - - # are we using the chunked-style of transfer encoding? - tr_enc = self.headers.get("transfer-encoding") - if tr_enc and tr_enc.lower() == "chunked": - self.chunked = True - self.chunk_left = None - else: - self.chunked = False - - # will the connection close at the end of the response? - self.will_close = self._check_close() - - # do we have a Content-Length? - # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" - self.length = None - length = self.headers.get("content-length") - - # are we using the chunked-style of transfer encoding? - tr_enc = self.headers.get("transfer-encoding") - if length and not self.chunked: - try: - self.length = int(length) - except ValueError: - self.length = None - else: - if self.length < 0: # ignore nonsensical negative lengths - self.length = None - else: - self.length = None - - # does the body have a fixed length? (of zero) - if (status == NO_CONTENT or status == NOT_MODIFIED or - 100 <= status < 200 or # 1xx codes - self._method == "HEAD"): - self.length = 0 - - # if the connection remains open, and we aren't using chunked, and - # a content-length was not provided, then assume that the connection - # WILL close. - if (not self.will_close and - not self.chunked and - self.length is None): - self.will_close = True - - def _check_close(self): - conn = self.headers.get("connection") - if self.version == 11: - # An HTTP/1.1 proxy is assumed to stay open unless - # explicitly closed. - conn = self.headers.get("connection") - if conn and "close" in conn.lower(): - return True - return False - - # Some HTTP/1.0 implementations have support for persistent - # connections, using rules different than HTTP/1.1. - - # For older HTTP, Keep-Alive indicates persistent connection. - if self.headers.get("keep-alive"): - return False - - # At least Akamai returns a "Connection: Keep-Alive" header, - # which was supposed to be sent by the client. - if conn and "keep-alive" in conn.lower(): - return False - - # Proxy-Connection is a netscape hack. - pconn = self.headers.get("proxy-connection") - if pconn and "keep-alive" in pconn.lower(): - return False - - # otherwise, assume it will close - return True - - def _close_conn(self): - fp = self.fp - self.fp = None - fp.close() - - def close(self): - super().close() # set "closed" flag - if self.fp: - self._close_conn() - - # These implementations are for the benefit of io.BufferedReader. - - # XXX This class should probably be revised to act more like - # the "raw stream" that BufferedReader expects. - - def flush(self): - super().flush() - if self.fp: - self.fp.flush() - - def readable(self): - return True - - # End of "raw stream" methods - - def isclosed(self): - """True if the connection is closed.""" - # NOTE: it is possible that we will not ever call self.close(). This - # case occurs when will_close is TRUE, length is None, and we - # read up to the last byte, but NOT past it. - # - # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be - # called, meaning self.isclosed() is meaningful. - return self.fp is None - - def read(self, amt=None): - if self.fp is None: - return bytes(b"") - - if self._method == "HEAD": - self._close_conn() - return bytes(b"") - - if amt is not None: - # Amount is given, so call base class version - # (which is implemented in terms of self.readinto) - return bytes(super(HTTPResponse, self).read(amt)) - else: - # Amount is not given (unbounded read) so we must check self.length - # and self.chunked - - if self.chunked: - return self._readall_chunked() - - if self.length is None: - s = self.fp.read() - else: - try: - s = self._safe_read(self.length) - except IncompleteRead: - self._close_conn() - raise - self.length = 0 - self._close_conn() # we read everything - return bytes(s) - - def readinto(self, b): - if self.fp is None: - return 0 - - if self._method == "HEAD": - self._close_conn() - return 0 - - if self.chunked: - return self._readinto_chunked(b) - - if self.length is not None: - if len(b) > self.length: - # clip the read to the "end of response" - b = memoryview(b)[0:self.length] - - # we do not use _safe_read() here because this may be a .will_close - # connection, and the user is reading more bytes than will be provided - # (for example, reading in 1k chunks) - - if PY2: - data = self.fp.read(len(b)) - n = len(data) - b[:n] = data - else: - n = self.fp.readinto(b) - - if not n and b: - # Ideally, we would raise IncompleteRead if the content-length - # wasn't satisfied, but it might break compatibility. - self._close_conn() - elif self.length is not None: - self.length -= n - if not self.length: - self._close_conn() - return n - - def _read_next_chunk_size(self): - # Read the next chunk size from the file - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("chunk size") - i = line.find(b";") - if i >= 0: - line = line[:i] # strip chunk-extensions - try: - return int(line, 16) - except ValueError: - # close the connection as protocol synchronisation is - # probably lost - self._close_conn() - raise - - def _read_and_discard_trailer(self): - # read and discard trailer up to the CRLF terminator - ### note: we shouldn't have any trailers! - while True: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("trailer line") - if not line: - # a vanishingly small number of sites EOF without - # sending the trailer - break - if line in (b'\r\n', b'\n', b''): - break - - def _readall_chunked(self): - assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - value = [] - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(bytes(b'').join(value)) - value.append(self._safe_read(chunk_left)) - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return bytes(b'').join(value) - - def _readinto_chunked(self, b): - assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - - total_bytes = 0 - mvb = memoryview(b) - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(bytes(b[0:total_bytes])) - - if len(mvb) < chunk_left: - n = self._safe_readinto(mvb) - self.chunk_left = chunk_left - n - return total_bytes + n - elif len(mvb) == chunk_left: - n = self._safe_readinto(mvb) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return total_bytes + n - else: - temp_mvb = mvb[0:chunk_left] - n = self._safe_readinto(temp_mvb) - mvb = mvb[n:] - total_bytes += n - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return total_bytes - - def _safe_read(self, amt): - """Read the number of bytes requested, compensating for partial reads. - - Normally, we have a blocking socket, but a read() can be interrupted - by a signal (resulting in a partial read). - - Note that we cannot distinguish between EOF and an interrupt when zero - bytes have been read. IncompleteRead() will be raised in this - situation. - - This function should be used when <amt> bytes "should" be present for - reading. If the bytes are truly not available (due to EOF), then the - IncompleteRead exception can be used to detect the problem. - """ - s = [] - while amt > 0: - chunk = self.fp.read(min(amt, MAXAMOUNT)) - if not chunk: - raise IncompleteRead(bytes(b'').join(s), amt) - s.append(chunk) - amt -= len(chunk) - return bytes(b"").join(s) - - def _safe_readinto(self, b): - """Same as _safe_read, but for reading into a buffer.""" - total_bytes = 0 - mvb = memoryview(b) - while total_bytes < len(b): - if MAXAMOUNT < len(mvb): - temp_mvb = mvb[0:MAXAMOUNT] - if PY2: - data = self.fp.read(len(temp_mvb)) - n = len(data) - temp_mvb[:n] = data - else: - n = self.fp.readinto(temp_mvb) - else: - if PY2: - data = self.fp.read(len(mvb)) - n = len(data) - mvb[:n] = data - else: - n = self.fp.readinto(mvb) - if not n: - raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) - mvb = mvb[n:] - total_bytes += n - return total_bytes - - def fileno(self): - return self.fp.fileno() - - def getheader(self, name, default=None): - if self.headers is None: - raise ResponseNotReady() - headers = self.headers.get_all(name) or default - if isinstance(headers, str) or not hasattr(headers, '__iter__'): - return headers - else: - return ', '.join(headers) - - def getheaders(self): - """Return list of (header, value) tuples.""" - if self.headers is None: - raise ResponseNotReady() - return list(self.headers.items()) - - # We override IOBase.__iter__ so that it doesn't check for closed-ness - - def __iter__(self): - return self - - # For compatibility with old-style urllib responses. - - def info(self): - return self.headers - - def geturl(self): - return self.url - - def getcode(self): - return self.status - -class HTTPConnection(object): - - _http_vsn = 11 - _http_vsn_str = 'HTTP/1.1' - - response_class = HTTPResponse - default_port = HTTP_PORT - auto_open = 1 - debuglevel = 0 - - def __init__(self, host, port=None, strict=_strict_sentinel, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): - if strict is not _strict_sentinel: - warnings.warn("the 'strict' argument isn't supported anymore; " - "http.client now always assumes HTTP/1.x compliant servers.", - DeprecationWarning, 2) - self.timeout = timeout - self.source_address = source_address - self.sock = None - self._buffer = [] - self.__response = None - self.__state = _CS_IDLE - self._method = None - self._tunnel_host = None - self._tunnel_port = None - self._tunnel_headers = {} - - self._set_hostport(host, port) - - def set_tunnel(self, host, port=None, headers=None): - """ Sets up the host and the port for the HTTP CONNECT Tunnelling. - - The headers argument should be a mapping of extra HTTP headers - to send with the CONNECT request. - """ - self._tunnel_host = host - self._tunnel_port = port - if headers: - self._tunnel_headers = headers - else: - self._tunnel_headers.clear() - - def _set_hostport(self, host, port): - if port is None: - i = host.rfind(':') - j = host.rfind(']') # ipv6 addresses have [...] - if i > j: - try: - port = int(host[i+1:]) - except ValueError: - if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ - port = self.default_port - else: - raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) - host = host[:i] - else: - port = self.default_port - if host and host[0] == '[' and host[-1] == ']': - host = host[1:-1] - self.host = host - self.port = port - - def set_debuglevel(self, level): - self.debuglevel = level - - def _tunnel(self): - self._set_hostport(self._tunnel_host, self._tunnel_port) - connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port) - connect_bytes = connect_str.encode("ascii") - self.send(connect_bytes) - for header, value in self._tunnel_headers.items(): - header_str = "%s: %s\r\n" % (header, value) - header_bytes = header_str.encode("latin-1") - self.send(header_bytes) - self.send(bytes(b'\r\n')) - - response = self.response_class(self.sock, method=self._method) - (version, code, message) = response._read_status() - - if code != 200: - self.close() - raise socket.error("Tunnel connection failed: %d %s" % (code, - message.strip())) - while True: - line = response.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if not line: - # for sites which EOF without sending a trailer - break - if line in (b'\r\n', b'\n', b''): - break - - def connect(self): - """Connect to the host and port specified in __init__.""" - self.sock = socket_create_connection((self.host,self.port), - self.timeout, self.source_address) - if self._tunnel_host: - self._tunnel() - - def close(self): - """Close the connection to the HTTP server.""" - if self.sock: - self.sock.close() # close it manually... there may be other refs - self.sock = None - if self.__response: - self.__response.close() - self.__response = None - self.__state = _CS_IDLE - - def send(self, data): - """Send `data' to the server. - ``data`` can be a string object, a bytes object, an array object, a - file-like object that supports a .read() method, or an iterable object. - """ - - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise NotConnected() - - if self.debuglevel > 0: - print("send:", repr(data)) - blocksize = 8192 - # Python 2.7 array objects have a read method which is incompatible - # with the 2-arg calling syntax below. - if hasattr(data, "read") and not isinstance(data, array): - if self.debuglevel > 0: - print("sendIng a read()able") - encode = False - try: - mode = data.mode - except AttributeError: - # io.BytesIO and other file-like objects don't have a `mode` - # attribute. - pass - else: - if "b" not in mode: - encode = True - if self.debuglevel > 0: - print("encoding file using iso-8859-1") - while 1: - datablock = data.read(blocksize) - if not datablock: - break - if encode: - datablock = datablock.encode("iso-8859-1") - self.sock.sendall(datablock) - return - try: - self.sock.sendall(data) - except TypeError: - if isinstance(data, Iterable): - for d in data: - self.sock.sendall(d) - else: - raise TypeError("data should be a bytes-like object " - "or an iterable, got %r" % type(data)) - - def _output(self, s): - """Add a line of output to the current request buffer. - - Assumes that the line does *not* end with \\r\\n. - """ - self._buffer.append(s) - - def _send_output(self, message_body=None): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - A message_body may be specified, to be appended to the request. - """ - self._buffer.extend((bytes(b""), bytes(b""))) - msg = bytes(b"\r\n").join(self._buffer) - del self._buffer[:] - # If msg and message_body are sent in a single send() call, - # it will avoid performance problems caused by the interaction - # between delayed ack and the Nagle algorithm. - if isinstance(message_body, bytes): - msg += message_body - message_body = None - self.send(msg) - if message_body is not None: - # message_body was not a string (i.e. it is a file), and - # we must run the risk of Nagle. - self.send(message_body) - - def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): - """Send a request to the server. - - `method' specifies an HTTP request method, e.g. 'GET'. - `url' specifies the object being requested, e.g. '/index.html'. - `skip_host' if True does not add automatically a 'Host:' header - `skip_accept_encoding' if True does not add automatically an - 'Accept-Encoding:' header - """ - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - - # in certain cases, we cannot issue another request on this connection. - # this occurs when: - # 1) we are in the process of sending a request. (_CS_REQ_STARTED) - # 2) a response to a previous request has signalled that it is going - # to close the connection upon completion. - # 3) the headers for the previous response have not been read, thus - # we cannot determine whether point (2) is true. (_CS_REQ_SENT) - # - # if there is no prior response, then we can request at will. - # - # if point (2) is true, then we will have passed the socket to the - # response (effectively meaning, "there is no prior response"), and - # will open a new one when a new request is made. - # - # Note: if a prior response exists, then we *can* start a new request. - # We are not allowed to begin fetching the response to this new - # request, however, until that prior response is complete. - # - if self.__state == _CS_IDLE: - self.__state = _CS_REQ_STARTED - else: - raise CannotSendRequest(self.__state) - - # Save the method we use, we need it later in the response phase - self._method = method - if not url: - url = '/' - request = '%s %s %s' % (method, url, self._http_vsn_str) - - # Non-ASCII characters should have been eliminated earlier - self._output(request.encode('ascii')) - - if self._http_vsn == 11: - # Issue some standard headers for better HTTP/1.1 compliance - - if not skip_host: - # this header is issued *only* for HTTP/1.1 - # connections. more specifically, this means it is - # only issued when the client uses the new - # HTTPConnection() class. backwards-compat clients - # will be using HTTP/1.0 and those clients may be - # issuing this header themselves. we should NOT issue - # it twice; some web servers (such as Apache) barf - # when they see two Host: headers - - # If we need a non-standard port,include it in the - # header. If the request is going through a proxy, - # but the host of the actual URL, not the host of the - # proxy. - - netloc = '' - if url.startswith('http'): - nil, netloc, nil, nil, nil = urlsplit(url) - - if netloc: - try: - netloc_enc = netloc.encode("ascii") - except UnicodeEncodeError: - netloc_enc = netloc.encode("idna") - self.putheader('Host', netloc_enc) - else: - try: - host_enc = self.host.encode("ascii") - except UnicodeEncodeError: - host_enc = self.host.encode("idna") - - # As per RFC 273, IPv6 address should be wrapped with [] - # when used as Host header - - if self.host.find(':') >= 0: - host_enc = bytes(b'[' + host_enc + b']') - - if self.port == self.default_port: - self.putheader('Host', host_enc) - else: - host_enc = host_enc.decode("ascii") - self.putheader('Host', "%s:%s" % (host_enc, self.port)) - - # note: we are assuming that clients will not attempt to set these - # headers since *this* library must deal with the - # consequences. this also means that when the supporting - # libraries are updated to recognize other forms, then this - # code should be changed (removed or updated). - - # we only want a Content-Encoding of "identity" since we don't - # support encodings such as x-gzip or x-deflate. - if not skip_accept_encoding: - self.putheader('Accept-Encoding', 'identity') - - # we can accept "chunked" Transfer-Encodings, but no others - # NOTE: no TE header implies *only* "chunked" - #self.putheader('TE', 'chunked') - - # if TE is supplied in the header, then it must appear in a - # Connection header. - #self.putheader('Connection', 'TE') - - else: - # For HTTP/1.0, the server will assume "not chunked" - pass - - def putheader(self, header, *values): - """Send a request header line to the server. - - For example: h.putheader('Accept', 'text/html') - """ - if self.__state != _CS_REQ_STARTED: - raise CannotSendHeader() - - if hasattr(header, 'encode'): - header = header.encode('ascii') - values = list(values) - for i, one_value in enumerate(values): - if hasattr(one_value, 'encode'): - values[i] = one_value.encode('latin-1') - elif isinstance(one_value, int): - values[i] = str(one_value).encode('ascii') - value = bytes(b'\r\n\t').join(values) - header = header + bytes(b': ') + value - self._output(header) - - def endheaders(self, message_body=None): - """Indicate that the last header line has been sent to the server. - - This method sends the request to the server. The optional message_body - argument can be used to pass a message body associated with the - request. The message body will be sent in the same packet as the - message headers if it is a string, otherwise it is sent as a separate - packet. - """ - if self.__state == _CS_REQ_STARTED: - self.__state = _CS_REQ_SENT - else: - raise CannotSendHeader() - self._send_output(message_body) - - def request(self, method, url, body=None, headers={}): - """Send a complete request to the server.""" - self._send_request(method, url, body, headers) - - def _set_content_length(self, body): - # Set the content-length based on the body. - thelen = None - try: - thelen = str(len(body)) - except TypeError as te: - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") - - if thelen is not None: - self.putheader('Content-Length', thelen) - - def _send_request(self, method, url, body, headers): - # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) - skips = {} - if 'host' in header_names: - skips['skip_host'] = 1 - if 'accept-encoding' in header_names: - skips['skip_accept_encoding'] = 1 - - self.putrequest(method, url, **skips) - - if body is not None and ('content-length' not in header_names): - self._set_content_length(body) - for hdr, value in headers.items(): - self.putheader(hdr, value) - if isinstance(body, str): - # RFC 2616 Section 3.7.1 says that text default has a - # default charset of iso-8859-1. - body = body.encode('iso-8859-1') - self.endheaders(body) - - def getresponse(self): - """Get the response from the server. - - If the HTTPConnection is in the correct state, returns an - instance of HTTPResponse or of whatever object is returned by - class the response_class variable. - - If a request has not been sent or if a previous response has - not be handled, ResponseNotReady is raised. If the HTTP - response indicates that the connection should be closed, then - it will be closed before the response is returned. When the - connection is closed, the underlying socket is closed. - """ - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - # if a prior response exists, then it must be completed (otherwise, we - # cannot read this response's header to determine the connection-close - # behavior) - # - # note: if a prior response existed, but was connection-close, then the - # socket and response were made independent of this HTTPConnection - # object since a new request requires that we open a whole new - # connection - # - # this means the prior response had one of two states: - # 1) will_close: this connection was reset and the prior socket and - # response operate independently - # 2) persistent: the response was retained and we await its - # isclosed() status to become true. - # - if self.__state != _CS_REQ_SENT or self.__response: - raise ResponseNotReady(self.__state) - - if self.debuglevel > 0: - response = self.response_class(self.sock, self.debuglevel, - method=self._method) - else: - response = self.response_class(self.sock, method=self._method) - - response.begin() - assert response.will_close != _UNKNOWN - self.__state = _CS_IDLE - - if response.will_close: - # this effectively passes the connection to the response - self.close() - else: - # remember this, so we can tell when it is complete - self.__response = response - - return response - -try: - import ssl - from ssl import SSLContext -except ImportError: - pass -else: - class HTTPSConnection(HTTPConnection): - "This class allows communication via SSL." - - default_port = HTTPS_PORT - - # XXX Should key_file and cert_file be deprecated in favour of context? - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, **_3to2kwargs): - if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname'] - else: check_hostname = None - if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context'] - else: context = None - super(HTTPSConnection, self).__init__(host, port, strict, timeout, - source_address) - self.key_file = key_file - self.cert_file = cert_file - if context is None: - # Some reasonable defaults - context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - context.options |= ssl.OP_NO_SSLv2 - will_verify = context.verify_mode != ssl.CERT_NONE - if check_hostname is None: - check_hostname = will_verify - elif check_hostname and not will_verify: - raise ValueError("check_hostname needs a SSL context with " - "either CERT_OPTIONAL or CERT_REQUIRED") - if key_file or cert_file: - context.load_cert_chain(cert_file, key_file) - self._context = context - self._check_hostname = check_hostname - - def connect(self): - "Connect to a host on a given (SSL) port." - - sock = socket_create_connection((self.host, self.port), - self.timeout, self.source_address) - - if self._tunnel_host: - self.sock = sock - self._tunnel() - - server_hostname = self.host if ssl.HAS_SNI else None - self.sock = self._context.wrap_socket(sock, - server_hostname=server_hostname) - try: - if self._check_hostname: - ssl.match_hostname(self.sock.getpeercert(), self.host) - except Exception: - self.sock.shutdown(socket.SHUT_RDWR) - self.sock.close() - raise - - __all__.append("HTTPSConnection") - - - # ###################################### - # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext - # # doesn't exist in the Py2.7 stdlib - # class HTTPSConnection(HTTPConnection): - # "This class allows communication via SSL." - - # default_port = HTTPS_PORT - - # def __init__(self, host, port=None, key_file=None, cert_file=None, - # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - # source_address=None): - # HTTPConnection.__init__(self, host, port, strict, timeout, - # source_address) - # self.key_file = key_file - # self.cert_file = cert_file - - # def connect(self): - # "Connect to a host on a given (SSL) port." - - # sock = socket_create_connection((self.host, self.port), - # self.timeout, self.source_address) - # if self._tunnel_host: - # self.sock = sock - # self._tunnel() - # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) - - # __all__.append("HTTPSConnection") - # ###################################### - - -class HTTPException(Exception): - # Subclasses that define an __init__ must call Exception.__init__ - # or define self.args. Otherwise, str() will fail. - pass - -class NotConnected(HTTPException): - pass - -class InvalidURL(HTTPException): - pass - -class UnknownProtocol(HTTPException): - def __init__(self, version): - self.args = version, - self.version = version - -class UnknownTransferEncoding(HTTPException): - pass - -class UnimplementedFileMode(HTTPException): - pass - -class IncompleteRead(HTTPException): - def __init__(self, partial, expected=None): - self.args = partial, - self.partial = partial - self.expected = expected - def __repr__(self): - if self.expected is not None: - e = ', %i more expected' % self.expected - else: - e = '' - return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) - def __str__(self): - return repr(self) - -class ImproperConnectionState(HTTPException): - pass - -class CannotSendRequest(ImproperConnectionState): - pass - -class CannotSendHeader(ImproperConnectionState): - pass - -class ResponseNotReady(ImproperConnectionState): - pass - -class BadStatusLine(HTTPException): - def __init__(self, line): - if not line: - line = repr(line) - self.args = line, - self.line = line - -class LineTooLong(HTTPException): - def __init__(self, line_type): - HTTPException.__init__(self, "got more than %d bytes when reading %s" - % (_MAXLINE, line_type)) - -# for backwards compatibility -error = HTTPException diff --git a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/cookiejar.py b/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/cookiejar.py deleted file mode 100644 index af3ef41..0000000 --- a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/cookiejar.py +++ /dev/null @@ -1,2110 +0,0 @@ -r"""HTTP cookie handling for web clients. - -This is a backport of the Py3.3 ``http.cookiejar`` module for -python-future. - -This module has (now fairly distant) origins in Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - -Class diagram (note that BSDDBCookieJar and the MSIE* classes are not -distributed with the Python standard library, but are available from -http://wwwsearch.sf.net/): - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -""" - -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import filter, int, map, open, str -from future.utils import as_native_str, PY2 - -__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', - 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] - -import copy -import datetime -import re -if PY2: - re.ASCII = 0 -import time -from future.backports.urllib.parse import urlparse, urlsplit, quote -from future.backports.http.client import HTTP_PORT -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading -from calendar import timegm - -debug = False # set to True to enable debugging via the logging module -logger = None - -def _debug(*args): - if not debug: - return - global logger - if not logger: - import logging - logger = logging.getLogger("http.cookiejar") - return logger.debug(*args) - - -DEFAULT_HTTP_PORT = str(HTTP_PORT) -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") - -def _warn_unhandled_exception(): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. Warn if any - # exceptions are caught there. - import io, warnings, traceback - f = io.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) - - -# Date/time conversion -# ----------------------------------------------------------------------------- - -EPOCH_YEAR = 1970 -def _timegm(tt): - year, month, mday, hour, min, sec = tt[:6] - if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and - (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): - return timegm(tt) - else: - return None - -DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -MONTHS_LOWER = [] -for month in MONTHS: MONTHS_LOWER.append(month.lower()) - -def time2isoz(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", - representing Universal Time (UTC, aka GMT). An example of this format is: - - 1994-11-24 08:49:37Z - - """ - if t is None: - dt = datetime.datetime.utcnow() - else: - dt = datetime.datetime.utcfromtimestamp(t) - return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( - dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) - -def time2netscape(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like this: - - Wed, DD-Mon-YYYY HH:MM:SS GMT - - """ - if t is None: - dt = datetime.datetime.utcnow() - else: - dt = datetime.datetime.utcfromtimestamp(t) - return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( - DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], - dt.year, dt.hour, dt.minute, dt.second) - - -UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} - -TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) -def offset_from_tz_string(tz): - offset = None - if tz in UTC_ZONES: - offset = 0 - else: - m = TIMEZONE_RE.search(tz) - if m: - offset = 3600 * int(m.group(2)) - if m.group(3): - offset = offset + 60 * int(m.group(3)) - if m.group(1) == '-': - offset = -offset - return offset - -def _str2time(day, mon, yr, hr, min, sec, tz): - # translate month name to number - # month numbers start with 1 (January) - try: - mon = MONTHS_LOWER.index(mon.lower())+1 - except ValueError: - # maybe it's already a number - try: - imon = int(mon) - except ValueError: - return None - if 1 <= imon <= 12: - mon = imon - else: - return None - - # make sure clock elements are defined - if hr is None: hr = 0 - if min is None: min = 0 - if sec is None: sec = 0 - - yr = int(yr) - day = int(day) - hr = int(hr) - min = int(min) - sec = int(sec) - - if yr < 1000: - # find "obvious" year - cur_yr = time.localtime(time.time())[0] - m = cur_yr % 100 - tmp = yr - yr = yr + cur_yr - m - m = m - tmp - if abs(m) > 50: - if m > 0: yr = yr + 100 - else: yr = yr - 100 - - # convert UTC time tuple to seconds since epoch (not timezone-adjusted) - t = _timegm((yr, mon, day, hr, min, sec, tz)) - - if t is not None: - # adjust time using timezone string, to get absolute time since epoch - if tz is None: - tz = "UTC" - tz = tz.upper() - offset = offset_from_tz_string(tz) - if offset is None: - return None - t = t - offset - - return t - -STRICT_DATE_RE = re.compile( - r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) -WEEKDAY_RE = re.compile( - r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) -LOOSE_HTTP_DATE_RE = re.compile( - r"""^ - (\d\d?) # day - (?:\s+|[-\/]) - (\w+) # month - (?:\s+|[-\/]) - (\d+) # year - (?: - (?:\s+|:) # separator before clock - (\d\d?):(\d\d) # hour:min - (?::(\d\d))? # optional seconds - )? # optional clock - \s* - ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone - \s* - (?:\(\w+\))? # ASCII representation of timezone in parens. - \s*$""", re.X | re.ASCII) -def http2time(text): - """Returns time in seconds since epoch of time represented by a string. - - Return value is an integer. - - None is returned if the format of str is unrecognized, the time is outside - the representable range, or the timezone string is not recognized. If the - string contains no timezone, UTC is assumed. - - The timezone in the string may be numerical (like "-0800" or "+0100") or a - string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the - timezone strings equivalent to UTC (zero offset) are known to the function. - - The function loosely parses the following formats: - - Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format - Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format - Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format - 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) - 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) - 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) - - The parser ignores leading and trailing whitespace. The time may be - absent. - - If the year is given with only 2 digits, the function will select the - century that makes the year closest to the current date. - - """ - # fast exit for strictly conforming string - m = STRICT_DATE_RE.search(text) - if m: - g = m.groups() - mon = MONTHS_LOWER.index(g[1].lower()) + 1 - tt = (int(g[2]), mon, int(g[0]), - int(g[3]), int(g[4]), float(g[5])) - return _timegm(tt) - - # No, we need some messy parsing... - - # clean up - text = text.lstrip() - text = WEEKDAY_RE.sub("", text, 1) # Useless weekday - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = LOOSE_HTTP_DATE_RE.search(text) - if m is not None: - day, mon, yr, hr, min, sec, tz = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - -ISO_DATE_RE = re.compile( - """^ - (\d{4}) # year - [-\/]? - (\d\d?) # numerical month - [-\/]? - (\d\d?) # day - (?: - (?:\s+|[-:Tt]) # separator before clock - (\d\d?):?(\d\d) # hour:min - (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) - )? # optional clock - \s* - ([-+]?\d\d?:?(:?\d\d)? - |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) - \s*$""", re.X | re. ASCII) -def iso2time(text): - """ - As for http2time, but parses the ISO 8601 formats: - - 1994-02-03 14:15:29 -0100 -- ISO 8601 format - 1994-02-03 14:15:29 -- zone is optional - 1994-02-03 -- only date - 1994-02-03T14:15:29 -- Use T as separator - 19940203T141529Z -- ISO 8601 compact format - 19940203 -- only date - - """ - # clean up - text = text.lstrip() - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = ISO_DATE_RE.search(text) - if m is not None: - # XXX there's an extra bit of the timezone I'm ignoring here: is - # this the right thing to do? - yr, mon, day, hr, min, sec, tz, _ = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - - -# Header parsing -# ----------------------------------------------------------------------------- - -def unmatched(match): - """Return unmatched part of re.Match object.""" - start, end = match.span(0) - return match.string[:start]+match.string[end:] - -HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") -HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") -HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") -HEADER_ESCAPE_RE = re.compile(r"\\(.)") -def split_header_words(header_values): - r"""Parse header values into a list of lists containing key,value pairs. - - The function knows how to deal with ",", ";" and "=" as well as quoted - values after "=". A list of space separated tokens are parsed as if they - were separated by ";". - - If the header_values passed as argument contains multiple values, then they - are treated as if they were a single value separated by comma ",". - - This means that this function is useful for parsing header fields that - follow this syntax (BNF as from the HTTP/1.1 specification, but we relax - the requirement for tokens). - - headers = #header - header = (token | parameter) *( [";"] (token | parameter)) - - token = 1*<any CHAR except CTLs or separators> - separators = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - - quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - qdtext = <any TEXT except <">> - quoted-pair = "\" CHAR - - parameter = attribute "=" value - attribute = token - value = token | quoted-string - - Each header is represented by a list of key/value pairs. The value for a - simple token (not part of a parameter) is None. Syntactically incorrect - headers will not necessarily be parsed as you would want. - - This is easier to describe with some examples: - - >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) - [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] - >>> split_header_words(['text/html; charset="iso-8859-1"']) - [[('text/html', None), ('charset', 'iso-8859-1')]] - >>> split_header_words([r'Basic realm="\"foo\bar\""']) - [[('Basic', None), ('realm', '"foobar"')]] - - """ - assert not isinstance(header_values, str) - result = [] - for text in header_values: - orig_text = text - pairs = [] - while text: - m = HEADER_TOKEN_RE.search(text) - if m: - text = unmatched(m) - name = m.group(1) - m = HEADER_QUOTED_VALUE_RE.search(text) - if m: # quoted value - text = unmatched(m) - value = m.group(1) - value = HEADER_ESCAPE_RE.sub(r"\1", value) - else: - m = HEADER_VALUE_RE.search(text) - if m: # unquoted value - text = unmatched(m) - value = m.group(1) - value = value.rstrip() - else: - # no value, a lone token - value = None - pairs.append((name, value)) - elif text.lstrip().startswith(","): - # concatenated headers, as per RFC 2616 section 4.2 - text = text.lstrip()[1:] - if pairs: result.append(pairs) - pairs = [] - else: - # skip junk - non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) - assert nr_junk_chars > 0, ( - "split_header_words bug: '%s', '%s', %s" % - (orig_text, text, pairs)) - text = non_junk - if pairs: result.append(pairs) - return result - -HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse (almost) of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) - 'text/plain; charset="iso-8859/1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) - 'text/plain, charset="iso-8859/1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) - -def strip_quotes(text): - if text.startswith('"'): - text = text[1:] - if text.endswith('"'): - text = text[:-1] - return text - -def parse_ns_headers(ns_headers): - """Ad-hoc parser for Netscape protocol cookie-attributes. - - The old Netscape cookie format for Set-Cookie can for instance contain - an unquoted "," in the expires field, so we have to use this ad-hoc - parser instead of split_header_words. - - XXX This may not make the best possible effort to parse all the crap - that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient - parser is probably better, so could do worse than following that if - this ever gives any trouble. - - Currently, this is also used for parsing RFC 2109 cookies. - - """ - known_attrs = ("expires", "domain", "path", "secure", - # RFC 2109 attrs (may turn up in Netscape cookies, too) - "version", "port", "max-age") - - result = [] - for ns_header in ns_headers: - pairs = [] - version_set = False - for ii, param in enumerate(re.split(r";\s*", ns_header)): - param = param.rstrip() - if param == "": continue - if "=" not in param: - k, v = param, None - else: - k, v = re.split(r"\s*=\s*", param, 1) - k = k.lstrip() - if ii != 0: - lc = k.lower() - if lc in known_attrs: - k = lc - if k == "version": - # This is an RFC 2109 cookie. - v = strip_quotes(v) - version_set = True - if k == "expires": - # convert expires date to seconds since epoch - v = http2time(strip_quotes(v)) # None if invalid - pairs.append((k, v)) - - if pairs: - if not version_set: - pairs.append(("version", "0")) - result.append(pairs) - - return result - - -IPV4_RE = re.compile(r"\.\d+$", re.ASCII) -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - if IPV4_RE.search(text): - return False - if text == "": - return False - if text[0] == "." or text[-1] == ".": - return False - return True - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - if i == -1 or i == 0: - # A does not have form NB, or N is the empty string - return False - if not B.startswith("."): - return False - if not is_HDN(B[1:]): - return False - return True - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - if IPV4_RE.search(text): - return False - return True - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$", re.ASCII) -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = urlparse(url)[1] - if host == "": - host = request.get_header("Host", "") - - # remove port, if present - host = cut_port_re.sub("", host, 1) - return host.lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name). - - As defined by RFC 2965, except both are lowercased. - - """ - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def request_path(request): - """Path component of request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - parts = urlsplit(url) - path = escape_path(parts.path) - if not path.startswith("/"): - # fix bad RFC 2396 absoluteURI - path = "/" + path - return path - -def request_port(request): - host = request.host - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - _debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - path = quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host(request) - if not domain_match(req_host, reach(request.get_origin_req_host())): - return True - else: - return False - - -class Cookie(object): - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(expires) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return name in self._rest - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - - def is_expired(self, now=None): - if now is None: now = time.time() - if (self.expires is not None) and (self.expires <= now): - return True - return False - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "<Cookie %s for %s>" % (namevalue, limit) - - @as_native_str() - def __repr__(self): - args = [] - for name in ("version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ): - attr = getattr(self, name) - ### Python-Future: - # Avoid u'...' prefixes for unicode strings: - if isinstance(attr, str): - attr = str(attr) - ### - args.append(str("%s=%s") % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "Cookie(%s)" % ", ".join(args) - - -class CookiePolicy(object): - """Defines which cookies get accepted from and returned to server. - - May also modify cookies, though this is probably a bad idea. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customised policy. - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server.""" - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies.""" - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """Constructor arguments should be passed as keyword arguments only.""" - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override .set_ok(), be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - _debug(" Set-Cookie2 without version attribute (%s=%s)", - cookie.name, cookie.value) - return False - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request.unverifiable and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - _debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - _debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - _debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - _debug(" domain %s is not in user allow-list", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host(request) - domain = cookie.domain - if self.strict_domain and (domain.count(".") >= 2): - # XXX This should probably be compared with the Konqueror - # (kcookiejar.cpp) and Mozilla implementations, but it's a - # losing battle. - i = domain.rfind(".") - j = domain.rfind(".", 0, i) - if j == 0: # domain like .foo.bar - tld = domain[i+1:] - sld = domain[j+1:i] - if sld.lower() in ("co", "ac", "com", "edu", "org", "net", - "gov", "mil", "int", "aero", "biz", "cat", "coop", - "info", "jobs", "mobi", "museum", "name", "pro", - "travel", "eu") and len(tld) == 2: - # domain like .co.uk - _debug(" country-code second level domain %s", domain) - return False - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - _debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - _debug(" effective request-host %s (even with added " - "initial dot) does not end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - _debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - _debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - _debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - _debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override .return_ok(), be sure to call this method. If it - returns false, so should your subclass (assuming your subclass wants to - be more strict about which cookies to return). - - """ - # Path has already been checked by .path_return_ok(), and domain - # blocking done by .domain_return_ok(). - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - for n in "version", "verifiability", "secure", "expires", "port", "domain": - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request.unverifiable and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.type != "https": - _debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - _debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - _debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - _debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - _debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - _debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - req_host, erhn = eff_request_host(request) - if not req_host.startswith("."): - req_host = "."+req_host - if not erhn.startswith("."): - erhn = "."+erhn - if not (req_host.endswith(domain) or erhn.endswith(domain)): - #_debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - _debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - _debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - _debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - _debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = sorted(adict.keys()) - return map(adict.get, keys) - -def deepvalues(mapping): - """Iterates over nested mapping, depth-first, in sorted order by key.""" - values = vals_sorted_by_key(mapping) - for obj in values: - mapping = False - try: - obj.items - except AttributeError: - pass - else: - mapping = True - for subobj in deepvalues(obj): - yield subobj - if not mapping: - yield obj - - -# Used as second parameter to dict.get() method, to distinguish absent -# dict key from one with a None value. -class Absent(object): pass - -class CookieJar(object): - """Collection of HTTP cookies. - - You may not need to know about this class: try - urllib.request.build_opener(HTTPCookieProcessor).open(url). - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) - - def __init__(self, policy=None): - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies_lock = _threading.RLock() - self._cookies = {} - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - _debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - _debug(" not returning cookie") - continue - _debug(" it's a match") - cookies.append(cookie) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - like ['foo="bar"; $Path="/"', ...] - - The $Version attribute is also added when appropriate (currently only - once per request). - - """ - # add cookies in order of most specific (ie. longest) path first - cookies.sort(key=lambda a: len(a.path), reverse=True) - - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib.request.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - """ - _debug("add_cookie_header") - self._cookies_lock.acquire() - try: - - self._policy._now = self._now = int(time.time()) - - cookies = self._cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header( - "Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if (self._policy.rfc2965 and not self._policy.hide_cookie2 and - not request.has_header("Cookie2")): - for cookie in cookies: - if cookie.version != 1: - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - finally: - self._cookies_lock.release() - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if k in standard: - # only first value is significant - continue - if k == "domain": - if v is None: - _debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - _debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - try: - v = int(v) - except ValueError: - _debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age is a - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ("port", "comment", "commenturl")): - _debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: - try: - version = int(version) - except ValueError: - return None # invalid version, ignore cookie - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - elif expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(domain, path, name) - except KeyError: - pass - _debug("Expiring cookie, domain='%s', path='%s', name='%s'", - domain, path, name) - return None - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) - if rfc2109_as_ns is None: - rfc2109_as_ns = not self._policy.rfc2965 - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_ns: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object.""" - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.get_all("Set-Cookie2", []) - ns_hdrs = headers.get_all("Set-Cookie", []) - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except Exception: - _warn_unhandled_exception() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except Exception: - _warn_unhandled_exception() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return key not in lookup - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so.""" - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - - finally: - self._cookies_lock.release() - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set.""" - c = self._cookies - self._cookies_lock.acquire() - try: - if cookie.domain not in c: c[cookie.domain] = {} - c2 = c[cookie.domain] - if cookie.path not in c2: c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - finally: - self._cookies_lock.release() - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request.""" - _debug("extract_cookies: %s", response.info()) - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - for cookie in self.make_cookies(response, request): - if self._policy.set_ok(cookie, request): - _debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - finally: - self._cookies_lock.release() - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Note that the .save() method won't save session cookies anyway, unless - you ask otherwise by passing a true ignore_discard argument. - - """ - self._cookies_lock.acquire() - try: - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the - .save() method won't save expired cookies anyway (unless you ask - otherwise by passing a true ignore_expires argument). - - """ - self._cookies_lock.acquire() - try: - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def __iter__(self): - return deepvalues(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - @as_native_str() - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - -# derives from IOError for backwards-compatibility with Python 2.4.0 -class LoadError(IOError): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file.""" - - def __init__(self, filename=None, delayload=False, policy=None): - """ - Cookies are NOT loaded from the named file until either the .load() or - .revert() method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None: - try: - filename+"" - except: - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file.""" - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file.""" - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename) - try: - self._really_load(f, filename, ignore_discard, ignore_expires) - finally: - f.close() - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or IOError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - self._cookies_lock.acquire() - try: - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except (LoadError, IOError): - self._cookies = old_state - raise - - finally: - self._cookies_lock.release() - - -def lwp_cookie_str(cookie): - """Return string representation of Cookie in an the LWP cookie file format. - - Actually, the format is extended a bit -- see module docstring. - - """ - h = [(cookie.name, cookie.value), - ("path", cookie.path), - ("domain", cookie.domain)] - if cookie.port is not None: h.append(("port", cookie.port)) - if cookie.path_specified: h.append(("path_spec", None)) - if cookie.port_specified: h.append(("port_spec", None)) - if cookie.domain_initial_dot: h.append(("domain_dot", None)) - if cookie.secure: h.append(("secure", None)) - if cookie.expires: h.append(("expires", - time2isoz(float(cookie.expires)))) - if cookie.discard: h.append(("discard", None)) - if cookie.comment: h.append(("comment", cookie.comment)) - if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - - keys = sorted(cookie._rest.keys()) - for k in keys: - h.append((k, str(cookie._rest[k]))) - - h.append(("version", str(cookie.version))) - - return join_header_words([h]) - -class LWPCookieJar(FileCookieJar): - """ - The LWPCookieJar saves a sequence of "Set-Cookie3" lines. - "Set-Cookie3" is the format used by the libwww-perl libary, not known - to be compatible with any browser, but which is easy to read and - doesn't lose information about RFC 2965 cookies. - - Additional methods - - as_lwp_str(ignore_discard=True, ignore_expired=True) - - """ - - def as_lwp_str(self, ignore_discard=True, ignore_expires=True): - """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. - - ignore_discard and ignore_expires: see docstring for FileCookieJar.save - - """ - now = time.time() - r = [] - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) - return "\n".join(r+[""]) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - # There really isn't an LWP Cookies 2.0 format, but this indicates - # that there is extra information in here (domain_dot and - # port_spec) while still being compatible with libwww-perl, I hope. - f.write("#LWP-Cookies-2.0\n") - f.write(self.as_lwp_str(ignore_discard, ignore_expires)) - finally: - f.close() - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - magic = f.readline() - if not self.magic_re.search(magic): - msg = ("%r does not look like a Set-Cookie3 (LWP) format " - "file" % filename) - raise LoadError(msg) - - now = time.time() - - header = "Set-Cookie3:" - boolean_attrs = ("port_spec", "path_spec", "domain_dot", - "secure", "discard") - value_attrs = ("version", - "port", "path", "domain", - "expires", - "comment", "commenturl") - - try: - while 1: - line = f.readline() - if line == "": break - if not line.startswith(header): - continue - line = line[len(header):].strip() - - for data in split_header_words([line]): - name, value = data[0] - standard = {} - rest = {} - for k in boolean_attrs: - standard[k] = False - for k, v in data[1:]: - if k is not None: - lc = k.lower() - else: - lc = None - # don't lose case distinction for unknown fields - if (lc in value_attrs) or (lc in boolean_attrs): - k = lc - if k in boolean_attrs: - if v is None: v = True - standard[k] = v - elif k in value_attrs: - standard[k] = v - else: - rest[k] = v - - h = standard.get - expires = h("expires") - discard = h("discard") - if expires is not None: - expires = iso2time(expires) - if expires is None: - discard = True - domain = h("domain") - domain_specified = domain.startswith(".") - c = Cookie(h("version"), name, value, - h("port"), h("port_spec"), - domain, domain_specified, h("domain_dot"), - h("path"), h("path_spec"), - h("secure"), - expires, - discard, - h("comment"), - h("commenturl"), - rest) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except IOError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Set-Cookie3 format file %r: %r" % - (filename, line)) - - -class MozillaCookieJar(FileCookieJar): - """ - - WARNING: you may want to backup your browser's cookies file if you use - this class to save cookies. I *think* it works, but there have been - bugs in the past! - - This class differs from CookieJar only in the format it uses to save and - load cookies to and from a file. This class uses the Mozilla/Netscape - `cookies.txt' format. lynx uses this file format, too. - - Don't expect cookies saved while the browser is running to be noticed by - the browser (in fact, Mozilla on unix will overwrite your saved cookies if - you change them on disk while it's running; on Windows, you probably can't - save at all while the browser is running). - - Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to - Netscape cookies on saving. - - In particular, the cookie version and port number information is lost, - together with information about whether or not Path, Port and Discard were - specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the - domain as set in the HTTP header started with a dot (yes, I'm aware some - domains in Netscape files start with a dot and some don't -- trust me, you - really don't want to know any more about this). - - Note that though Mozilla and Netscape use the same format, they use - slightly different headers. The class saves cookies using the Netscape - header by default (Mozilla can cope with that). - - """ - magic_re = re.compile("#( Netscape)? HTTP Cookie File") - header = """\ -# Netscape HTTP Cookie File -# http://www.netscape.com/newsref/std/cookie_spec.html -# This is a generated file! Do not edit. - -""" - - def _really_load(self, f, filename, ignore_discard, ignore_expires): - now = time.time() - - magic = f.readline() - if not self.magic_re.search(magic): - f.close() - raise LoadError( - "%r does not look like a Netscape format cookies file" % - filename) - - try: - while 1: - line = f.readline() - if line == "": break - - # last field may be absent, so keep any trailing tab - if line.endswith("\n"): line = line[:-1] - - # skip comments and blank lines XXX what is $ for? - if (line.strip().startswith(("#", "$")) or - line.strip() == ""): - continue - - domain, domain_specified, path, secure, expires, name, value = \ - line.split("\t") - secure = (secure == "TRUE") - domain_specified = (domain_specified == "TRUE") - if name == "": - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = value - value = None - - initial_dot = domain.startswith(".") - assert domain_specified == initial_dot - - discard = False - if expires == "": - expires = None - discard = True - - # assume path_specified is false - c = Cookie(0, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - {}) - if not ignore_discard and c.discard: - continue - if not ignore_expires and c.is_expired(now): - continue - self.set_cookie(c) - - except IOError: - raise - except Exception: - _warn_unhandled_exception() - raise LoadError("invalid Netscape format cookies file %r: %r" % - (filename, line)) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename, "w") - try: - f.write(self.header) - now = time.time() - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - if cookie.secure: secure = "TRUE" - else: secure = "FALSE" - if cookie.domain.startswith("."): initial_dot = "TRUE" - else: initial_dot = "FALSE" - if cookie.expires is not None: - expires = str(cookie.expires) - else: - expires = "" - if cookie.value is None: - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = "" - value = cookie.name - else: - name = cookie.name - value = cookie.value - f.write( - "\t".join([cookie.domain, initial_dot, cookie.path, - secure, expires, name, value])+ - "\n") - finally: - f.close() diff --git a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/cookies.py b/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/cookies.py deleted file mode 100644 index 8bb61e2..0000000 --- a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/cookies.py +++ /dev/null @@ -1,598 +0,0 @@ -#### -# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu> -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software -# and its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Timothy O'Malley not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS -# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR -# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -# PERFORMANCE OF THIS SOFTWARE. -# -#### -# -# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp -# by Timothy O'Malley <timo@alum.mit.edu> -# -# Cookie.py is a Python module for the handling of HTTP -# cookies as a Python dictionary. See RFC 2109 for more -# information on cookies. -# -# The original idea to treat Cookies as a dictionary came from -# Dave Mitchell (davem@magnet.com) in 1995, when he released the -# first version of nscookie.py. -# -#### - -r""" -http.cookies module ported to python-future from Py3.3 - -Here's a sample session to show how to use this module. -At the moment, this is the only documentation. - -The Basics ----------- - -Importing is easy... - - >>> from http import cookies - -Most of the time you start by creating a cookie. - - >>> C = cookies.SimpleCookie() - -Once you've created your Cookie, you can add values just as if it were -a dictionary. - - >>> C = cookies.SimpleCookie() - >>> C["fig"] = "newton" - >>> C["sugar"] = "wafer" - >>> C.output() - 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' - -Notice that the printable representation of a Cookie is the -appropriate format for a Set-Cookie: header. This is the -default behavior. You can change the header and printed -attributes by using the .output() function - - >>> C = cookies.SimpleCookie() - >>> C["rocky"] = "road" - >>> C["rocky"]["path"] = "/cookie" - >>> print(C.output(header="Cookie:")) - Cookie: rocky=road; Path=/cookie - >>> print(C.output(attrs=[], header="Cookie:")) - Cookie: rocky=road - -The load() method of a Cookie extracts cookies from a string. In a -CGI script, you would use this method to extract the cookies from the -HTTP_COOKIE environment variable. - - >>> C = cookies.SimpleCookie() - >>> C.load("chips=ahoy; vienna=finger") - >>> C.output() - 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' - -The load() method is darn-tootin smart about identifying cookies -within a string. Escaped quotation marks, nested semicolons, and other -such trickeries do not confuse it. - - >>> C = cookies.SimpleCookie() - >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') - >>> print(C) - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" - -Each element of the Cookie also supports all of the RFC 2109 -Cookie attributes. Here's an example which sets the Path -attribute. - - >>> C = cookies.SimpleCookie() - >>> C["oreo"] = "doublestuff" - >>> C["oreo"]["path"] = "/" - >>> print(C) - Set-Cookie: oreo=doublestuff; Path=/ - -Each dictionary element has a 'value' attribute, which gives you -back the value associated with the key. - - >>> C = cookies.SimpleCookie() - >>> C["twix"] = "none for you" - >>> C["twix"].value - 'none for you' - -The SimpleCookie expects that all values should be standard strings. -Just to be sure, SimpleCookie invokes the str() builtin to convert -the value to a string, when the values are set dictionary-style. - - >>> C = cookies.SimpleCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - '7' - >>> C["string"].value - 'seven' - >>> C.output() - 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' - -Finis. -""" -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -from future.builtins import chr, dict, int, str -from future.utils import PY2, as_native_str - -# -# Import our required modules -# -import re -if PY2: - re.ASCII = 0 # for py2 compatibility -import string - -__all__ = ["CookieError", "BaseCookie", "SimpleCookie"] - -_nulljoin = ''.join -_semispacejoin = '; '.join -_spacejoin = ' '.join - -# -# Define an exception visible to External modules -# -class CookieError(Exception): - pass - - -# These quoting routines conform to the RFC2109 specification, which in -# turn references the character definitions from RFC2068. They provide -# a two-way quoting algorithm. Any non-text character is translated -# into a 4 character sequence: a forward-slash followed by the -# three-digit octal equivalent of the character. Any '\' or '"' is -# quoted with a preceeding '\' slash. -# -# These are taken from RFC2068 and RFC2109. -# _LegalChars is the list of chars which don't require "'s -# _Translator hash-table for fast quoting -# -_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" -_Translator = { - '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', - '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', - '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', - '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', - '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', - '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', - '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', - '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', - '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', - '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', - '\036' : '\\036', '\037' : '\\037', - - # Because of the way browsers really handle cookies (as opposed - # to what the RFC says) we also encode , and ; - - ',' : '\\054', ';' : '\\073', - - '"' : '\\"', '\\' : '\\\\', - - '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', - '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', - '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', - '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', - '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', - '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', - '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', - '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', - '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', - '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', - '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', - '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', - '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', - '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', - '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', - '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', - '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', - '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', - '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', - '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', - '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', - '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', - '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', - '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', - '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', - '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', - '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', - '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', - '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', - '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', - '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', - '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', - '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', - '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', - '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', - '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', - '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', - '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', - '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', - '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', - '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', - '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', - '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' - } - -def _quote(str, LegalChars=_LegalChars): - r"""Quote a string for use in a cookie header. - - If the string does not need to be double-quoted, then just return the - string. Otherwise, surround the string in doublequotes and quote - (with a \) special characters. - """ - if all(c in LegalChars for c in str): - return str - else: - return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"' - - -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") - -def _unquote(mystr): - # If there aren't any doublequotes, - # then there can't be any special characters. See RFC 2109. - if len(mystr) < 2: - return mystr - if mystr[0] != '"' or mystr[-1] != '"': - return mystr - - # We have to assume that we must decode this string. - # Down to work. - - # Remove the "s - mystr = mystr[1:-1] - - # Check for special sequences. Examples: - # \012 --> \n - # \" --> " - # - i = 0 - n = len(mystr) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(mystr, i) - q_match = _QuotePatt.search(mystr, i) - if not o_match and not q_match: # Neither matched - res.append(mystr[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(mystr[i:k]) - res.append(mystr[k+1]) - i = k + 2 - else: # OctalPatt matched - res.append(mystr[i:j]) - res.append(chr(int(mystr[j+1:j+4], 8))) - i = j + 4 - return _nulljoin(res) - -# The _getdate() routine is used to set the expiration time in the cookie's HTTP -# header. By default, _getdate() returns the current time in the appropriate -# "expires" format for a Set-Cookie header. The one optional argument is an -# offset from now, in seconds. For example, an offset of -3600 means "one hour -# ago". The offset may be a floating point number. -# - -_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - -_monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - -def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): - from time import gmtime, time - now = time() - year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) - return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ - (weekdayname[wd], day, monthname[month], year, hh, mm, ss) - - -class Morsel(dict): - """A class to hold ONE (key, value) pair. - - In a cookie, each such pair may have several attributes, so this class is - used to keep the attributes associated with the appropriate key,value pair. - This class also includes a coded_value attribute, which is used to hold - the network representation of the value. This is most useful when Python - objects are pickled for network transit. - """ - # RFC 2109 lists these attributes as reserved: - # path comment domain - # max-age secure version - # - # For historical reasons, these attributes are also reserved: - # expires - # - # This is an extension from Microsoft: - # httponly - # - # This dictionary provides a mapping from the lowercase - # variant on the left to the appropriate traditional - # formatting on the right. - _reserved = { - "expires" : "expires", - "path" : "Path", - "comment" : "Comment", - "domain" : "Domain", - "max-age" : "Max-Age", - "secure" : "secure", - "httponly" : "httponly", - "version" : "Version", - } - - _flags = set(['secure', 'httponly']) - - def __init__(self): - # Set defaults - self.key = self.value = self.coded_value = None - - # Set default attributes - for key in self._reserved: - dict.__setitem__(self, key, "") - - def __setitem__(self, K, V): - K = K.lower() - if not K in self._reserved: - raise CookieError("Invalid Attribute %s" % K) - dict.__setitem__(self, K, V) - - def isReservedKey(self, K): - return K.lower() in self._reserved - - def set(self, key, val, coded_val, LegalChars=_LegalChars): - # First we verify that the key isn't a reserved word - # Second we make sure it only contains legal characters - if key.lower() in self._reserved: - raise CookieError("Attempt to set a reserved key: %s" % key) - if any(c not in LegalChars for c in key): - raise CookieError("Illegal key value: %s" % key) - - # It's a good key, so save it. - self.key = key - self.value = val - self.coded_value = coded_val - - def output(self, attrs=None, header="Set-Cookie:"): - return "%s %s" % (header, self.OutputString(attrs)) - - __str__ = output - - @as_native_str() - def __repr__(self): - if PY2 and isinstance(self.value, unicode): - val = str(self.value) # make it a newstr to remove the u prefix - else: - val = self.value - return '<%s: %s=%s>' % (self.__class__.__name__, - str(self.key), repr(val)) - - def js_output(self, attrs=None): - # Print javascript - return """ - <script type="text/javascript"> - <!-- begin hiding - document.cookie = \"%s\"; - // end hiding --> - </script> - """ % (self.OutputString(attrs).replace('"', r'\"')) - - def OutputString(self, attrs=None): - # Build up our result - # - result = [] - append = result.append - - # First, the key=value pair - append("%s=%s" % (self.key, self.coded_value)) - - # Now add any defined attributes - if attrs is None: - attrs = self._reserved - items = sorted(self.items()) - for key, value in items: - if value == "": - continue - if key not in attrs: - continue - if key == "expires" and isinstance(value, int): - append("%s=%s" % (self._reserved[key], _getdate(value))) - elif key == "max-age" and isinstance(value, int): - append("%s=%d" % (self._reserved[key], value)) - elif key == "secure": - append(str(self._reserved[key])) - elif key == "httponly": - append(str(self._reserved[key])) - else: - append("%s=%s" % (self._reserved[key], value)) - - # Return the result - return _semispacejoin(result) - - -# -# Pattern for finding cookie -# -# This used to be strict parsing based on the RFC2109 and RFC2068 -# specifications. I have since discovered that MSIE 3.0x doesn't -# follow the character rules outlined in those specs. As a -# result, the parsing rules here are less strict. -# - -_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" -_CookiePattern = re.compile(r""" - (?x) # This is a verbose pattern - (?P<key> # Start of group 'key' - """ + _LegalCharsPatt + r"""+? # Any word of at least one letter - ) # End of group 'key' - ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign - (?P<val> # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string - | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr - | # or - """ + _LegalCharsPatt + r"""* # Any word or empty string - ) # End of group 'val' - )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII) # May be removed if safe. - - -# At long last, here is the cookie class. Using this class is almost just like -# using a dictionary. See this module's docstring for example usage. -# -class BaseCookie(dict): - """A container class for a set of Morsels.""" - - def value_decode(self, val): - """real_value, coded_value = value_decode(STRING) - Called prior to setting a cookie's value from the network - representation. The VALUE is the value read from HTTP - header. - Override this function to modify the behavior of cookies. - """ - return val, val - - def value_encode(self, val): - """real_value, coded_value = value_encode(VALUE) - Called prior to setting a cookie's value from the dictionary - representation. The VALUE is the value being assigned. - Override this function to modify the behavior of cookies. - """ - strval = str(val) - return strval, strval - - def __init__(self, input=None): - if input: - self.load(input) - - def __set(self, key, real_value, coded_value): - """Private method for setting a cookie's value""" - M = self.get(key, Morsel()) - M.set(key, real_value, coded_value) - dict.__setitem__(self, key, M) - - def __setitem__(self, key, value): - """Dictionary style assignment.""" - rval, cval = self.value_encode(value) - self.__set(key, rval, cval) - - def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): - """Return a string suitable for HTTP.""" - result = [] - items = sorted(self.items()) - for key, value in items: - result.append(value.output(attrs, header)) - return sep.join(result) - - __str__ = output - - @as_native_str() - def __repr__(self): - l = [] - items = sorted(self.items()) - for key, value in items: - if PY2 and isinstance(value.value, unicode): - val = str(value.value) # make it a newstr to remove the u prefix - else: - val = value.value - l.append('%s=%s' % (str(key), repr(val))) - return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) - - def js_output(self, attrs=None): - """Return a string suitable for JavaScript.""" - result = [] - items = sorted(self.items()) - for key, value in items: - result.append(value.js_output(attrs)) - return _nulljoin(result) - - def load(self, rawdata): - """Load cookies from a string (presumably HTTP_COOKIE) or - from a dictionary. Loading cookies from a dictionary 'd' - is equivalent to calling: - map(Cookie.__setitem__, d.keys(), d.values()) - """ - if isinstance(rawdata, str): - self.__parse_string(rawdata) - else: - # self.update() wouldn't call our custom __setitem__ - for key, value in rawdata.items(): - self[key] = value - return - - def __parse_string(self, mystr, patt=_CookiePattern): - i = 0 # Our starting point - n = len(mystr) # Length of string - M = None # current morsel - - while 0 <= i < n: - # Start looking for a cookie - match = patt.search(mystr, i) - if not match: - # No more cookies - break - - key, value = match.group("key"), match.group("val") - - i = match.end(0) - - # Parse the key, value in case it's metainfo - if key[0] == "$": - # We ignore attributes which pertain to the cookie - # mechanism as a whole. See RFC 2109. - # (Does anyone care?) - if M: - M[key[1:]] = value - elif key.lower() in Morsel._reserved: - if M: - if value is None: - if key.lower() in Morsel._flags: - M[key] = True - else: - M[key] = _unquote(value) - elif value is not None: - rval, cval = self.value_decode(value) - self.__set(key, rval, cval) - M = self[key] - - -class SimpleCookie(BaseCookie): - """ - SimpleCookie supports strings as cookie values. When setting - the value using the dictionary assignment notation, SimpleCookie - calls the builtin str() to convert the value to a string. Values - received from HTTP are kept as strings. - """ - def value_decode(self, val): - return _unquote(val), val - - def value_encode(self, val): - strval = str(val) - return strval, _quote(strval) diff --git a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/server.py b/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/server.py deleted file mode 100644 index b1c11e0..0000000 --- a/FOSS/Python/Dependencies/future-0.18.2/src/future/backports/http/server.py +++ /dev/null @@ -1,1226 +0,0 @@ -"""HTTP server classes. - -From Python 3.3 - -Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see -SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, -and CGIHTTPRequestHandler for CGI scripts. - -It does, however, optionally implement HTTP/1.1 persistent connections, -as of version 0.3. - -Notes on CGIHTTPRequestHandler ------------------------------- - -This class implements GET and POST requests to cgi-bin scripts. - -If the os.fork() function is not present (e.g. on Windows), -subprocess.Popen() is used as a fallback, with slightly altered semantics. - -In all cases, the implementation is intentionally naive -- all -requests are executed synchronously. - -SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL --- it may execute arbitrary Python code or external programs. - -Note that status code 200 is sent prior to execution of a CGI script, so -scripts cannot send other status codes such as 302 (redirect). - -XXX To do: - -- log requests even later (to capture byte count) -- log user-agent header and other interesting goodies -- send error log to separate file -""" - -from __future__ import (absolute_import, division, - print_function, unicode_literals) -from future import utils -from future.builtins import * - - -# See also: -# -# HTTP Working Group T. Berners-Lee -# INTERNET-DRAFT R. T. Fielding -# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen -# Expires September 8, 1995 March 8, 1995 -# -# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt -# -# and -# -# Network Working Group R. Fielding -# Request for Comments: 2616 et al -# Obsoletes: 2068 June 1999 -# Category: Standards Track -# -# URL: http://www.faqs.org/rfcs/rfc2616.html - -# Log files -# --------- -# -# Here's a quote from the NCSA httpd docs about log file format. -# -# | The logfile format is as follows. Each line consists of: -# | -# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb -# | -# | host: Either the DNS name or the IP number of the remote client -# | rfc931: Any information returned by identd for this person, -# | - otherwise. -# | authuser: If user sent a userid for authentication, the user name, -# | - otherwise. -# | DD: Day -# | Mon: Month (calendar name) -# | YYYY: Year -# | hh: hour (24-hour format, the machine's timezone) -# | mm: minutes -# | ss: seconds -# | request: The first line of the HTTP request as sent by the client. -# | ddd: the status code returned by the server, - if not available. -# | bbbb: the total number of bytes sent, -# | *not including the HTTP/1.0 header*, - if not available -# | -# | You can determine the name of the file accessed through request. -# -# (Actually, the latter is only true if you know the server configuration -# at the time the request was made!) - -__version__ = "0.6" - -__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] - -from future.backports import html -from future.backports.http import client as http_client -from future.backports.urllib import parse as urllib_parse -from future.backports import socketserver - -import io -import mimetypes -import os -import posixpath -import select -import shutil -import socket # For gethostbyaddr() -import sys -import time -import copy -import argparse - - -# Default error message template -DEFAULT_ERROR_MESSAGE = """\ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> - <head> - <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> - <title>Error response</title> - </head> - <body> - <h1>Error response</h1> - <p>Error code: %(code)d</p> - <p>Message: %(message)s.</p> - <p>Error code explanation: %(code)s - %(explain)s.</p> - </body> -</html> -""" - -DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" - -def _quote_html(html): - return html.replace("&", "&").replace("<", "<").replace(">", ">") - -class HTTPServer(socketserver.TCPServer): - - allow_reuse_address = 1 # Seems to make sense in testing environment - - def server_bind(self): - """Override server_bind to store the server name.""" - socketserver.TCPServer.server_bind(self) - host, port = self.socket.getsockname()[:2] - self.server_name = socket.getfqdn(host) - self.server_port = port - - -class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): - - """HTTP request handler base class. - - The following explanation of HTTP serves to guide you through the - code as well as to expose any misunderstandings I may have about - HTTP (so you don't need to read the code to figure out I'm wrong - :-). - - HTTP (HyperText Transfer Protocol) is an extensible protocol on - top of a reliable stream transport (e.g. TCP/IP). The protocol - recognizes three parts to a request: - - 1. One line identifying the request type and path - 2. An optional set of RFC-822-style headers - 3. An optional data part - - The headers and data are separated by a blank line. - - The first line of the request has the form - - <command> <path> <version> - - where <command> is a (case-sensitive) keyword such as GET or POST, - <path> is a string containing path information for the request, - and <version> should be the string "HTTP/1.0" or "HTTP/1.1". - <path> is encoded using the URL encoding scheme (using %xx to signify - the ASCII character with hex code xx). - - The specification specifies that lines are separated by CRLF but - for compatibility with the widest range of clients recommends - servers also handle LF. Similarly, whitespace in the request line - is treated sensibly (allowing multiple spaces between components - and allowing trailing whitespace). - - Similarly, for output, lines ought to be separated by CRLF pairs - but most clients grok LF characters just fine. - - If the first line of the request has the form - - <command> <path> - - (i.e. <version> is left out) then this is assumed to be an HTTP - 0.9 request; this form has no optional headers and data part and - the reply consists of just the data. - - The reply form of the HTTP 1.x protocol again has three parts: - - 1. One line giving the response code - 2. An optional set of RFC-822-style headers - 3. The data - - Again, the headers and data are separated by a blank line. - - The response code line has the form - - <version> <responsecode> <responsestring> - - where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), - <responsecode> is a 3-digit response code indicating success or - failure of the request, and <responsestring> is an optional - human-readable string explaining what the response code means. - - This server parses the request and the headers, and then calls a - function specific to the request type (<command>). Specifically, - a request SPAM will be handled by a method do_SPAM(). If no - such method exists the server sends an error response to the - client. If it exists, it is called with no arguments: - - do_SPAM() - - Note that the request name is case sensitive (i.e. SPAM and spam - are different requests). - - The various request details are stored in instance variables: - - - client_address is the client IP address in the form (host, - port); - - - command, path and version are the broken-down request line; - - - headers is an instance of email.message.Message (or a derived - class) containing the header information; - - - rfile is a file object open for reading positioned at the - start of the optional input data part; - - - wfile is a file object open for writing. - - IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! - - The first thing to be written must be the response line. Then - follow 0 or more header lines, then a blank line, and then the - actual data (if any). The meaning of the header lines depends on - the command executed by the server; in most cases, when data is - returned, there should be at least one header line of the form - - Content-type: <type>/<subtype> - - where <type> and <subtype> should be registered MIME types, - e.g. "text/html" or "text/plain". - - """ - - # The Python system version, truncated to its first component. - sys_version = "Python/" + sys.version.split()[0] - - # The server software version. You may want to override this. - # The format is multiple whitespace-separated strings, - # where each string is of the form name[/version]. - server_version = "BaseHTTP/" + __version__ - - error_message_format = DEFAULT_ERROR_MESSAGE - error_content_type = DEFAULT_ERROR_CONTENT_TYPE - - # The default request version. This only affects responses up until - # the point where the request line is parsed, so it mainly decides what - # the client gets back when sending a malformed request line. - # Most web servers default to HTTP 0.9, i.e. don't send a status line. - default_request_version = "HTTP/0.9" - - def parse_request(self): - """Parse a request (internal). - - The request should be stored in self.raw_requestline; the results - are in self.command, self.path, self.request_version and - self.headers. - - Return True for success, False for failure; on failure, an - error is sent back. - - """ - self.command = None # set in case of error on the first line - self.request_version = version = self.default_request_version - self.close_connection = 1 - requestline = str(self.raw_requestline, 'iso-8859-1') - requestline = requestline.rstrip('\r\n') - self.requestline = requestline - words = requestline.split() - if len(words) == 3: - command, path, version = words - if version[:5] != 'HTTP/': - self.send_error(400, "Bad request version (%r)" % version) - return False - try: - base_version_number = version.split('/', 1)[1] - version_number = base_version_number.split(".") - # RFC 2145 section 3.1 says there can be only one "." and - # - major and minor numbers MUST be treated as - # separate integers; - # - HTTP/2.4 is a lower version than HTTP/2.13, which in - # turn is lower than HTTP/12.3; - # - Leading zeros MUST be ignored by recipients. - if len(version_number) != 2: - raise ValueError - version_number = int(version_number[0]), int(version_number[1]) - except (ValueError, IndexError): - self.send_error(400, "Bad request version (%r)" % version) - return False - if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": - self.close_connection = 0 - if version_number >= (2, 0): - self.send_error(505, - "Invalid HTTP Version (%s)" % base_version_number) - return False - elif len(words) == 2: - command, path = words - self.close_connection = 1 - if command != 'GET': - self.send_error(400, - "Bad HTTP/0.9 request type (%r)" % command) - return False - elif not words: - return False - else: - self.send_error(400, "Bad request syntax (%r)" % requestline) - return False - self.command, self.path, self.request_version = command, path, version - - # Examine the headers and look for a Connection directive. - try: - self.headers = http_client.parse_headers(self.rfile, - _class=self.MessageClass) - except http_client.LineTooLong: - self.send_error(400, "Line too long") - return False - - conntype = self.headers.get('Connection', "") - if conntype.lower() == 'close': - self.close_connection = 1 - elif (conntype.lower() == 'keep-alive' and - self.protocol_version >= "HTTP/1.1"): - self.close_connection = 0 - # Examine the headers and look for an Expect directive - expect = self.headers.get('Expect', "") - if (expect.lower() == "100-continue" and - self.protocol_version >= "HTTP/1.1" and - self.request_version >= "HTTP/1.1"): - if not self.handle_expect_100(): - return False - return True - - def handle_expect_100(self): - """Decide what to do with an "Expect: 100-continue" header. - - If the client is expecting a 100 Continue response, we must - respond with either a 100 Continue or a final response before - waiting for the request body. The default is to always respond - with a 100 Continue. You can behave differently (for example, - reject unauthorized requests) by overriding this method. - - This method should either return True (possibly after sending - a 100 Continue response) or send an error response and return - False. - - """ - self.send_response_only(100) - self.flush_headers() - return True - - def handle_one_request(self): - """Handle a single HTTP request. - - You normally don't need to override this method; see the class - __doc__ string for information on how to handle specific HTTP - commands such as GET and POST. - - """ - try: - self.raw_requestline = self.rfile.readline(65537) - if len(self.raw_requestline) > 65536: - self.requestline = '' - self.request_version = '' - self.command = '' - self.send_error(414) - return - if not self.raw_requestline: - self.close_connection = 1 - return - if not self.parse_request(): - # An error code has been sent, just exit - return - mname = 'do_' + self.command - if not hasattr(self, mname): - self.send_error(501, "Unsupported method (%r)" % self.command) - return - method = getattr(self, mname) - method() - self.wfile.flush() #actually send the response if not already done. - except socket.timeout as e: - #a read or a write timed out. Discard this connection - self.log_error("Request timed out: %r", e) - self.close_connection = 1 - return - - def handle(self): - """Handle multiple requests if necessary.""" - self.close_connection = 1 - - self.handle_one_request() - while not self.close_connection: - self.handle_one_request() - - def send_error(self, code, message=None): - """Send and log an error reply. - - Arguments are the error code, and a detailed message. - The detailed message defaults to the short entry matching the - response code. - - This sends an error response (so it must be called before any - output has been generated), logs the error, and finally sends - a piece of HTML explaining the error to the user. - - """ - - try: - shortmsg, longmsg = self.responses[code] - except KeyError: - shortmsg, longmsg = '???', '???' - if message is None: - message = shortmsg - explain = longmsg - self.log_error("code %d, message %s", code, message) - # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) - content = (self.error_message_format % - {'code': code, 'message': _quote_html(message), 'explain': explain}) - self.send_response(code, message) - self.send_header("Content-Type", self.error_content_type) - self.send_header('Connection', 'close') - self.end_headers() - if self.command != 'HEAD' and code >= 200 and code not in (204, 304): - self.wfile.write(content.encode('UTF-8', 'replace')) - - def send_response(self, code, message=None): - """Add the response header to the headers buffer and log the - response code. - - Also send two standard headers with the server software - version and the current date. - - """ - self.log_request(code) - self.send_response_only(code, message) - self.send_header('Server', self.version_string()) - self.send_header('Date', self.date_time_string()) - - def send_response_only(self, code, message=None): - """Send the response header only.""" - if message is None: - if code in self.responses: - message = self.responses[code][0] - else: - message = '' - if self.request_version != 'HTTP/0.9': - if not hasattr(self, '_headers_buffer'): - self._headers_buffer = [] - self._headers_buffer.append(("%s %d %s\r\n" % - (self.protocol_version, code, message)).encode( - 'latin-1', 'strict')) - - def send_header(self, keyword, value): - """Send a MIME header to the headers buffer.""" - if self.request_version != 'HTTP/0.9': - if not hasattr(self, '_headers_buffer'): - self._headers_buffer = [] - self._headers_buffer.append( - ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) - - if keyword.lower() == 'connection': - if value.lower() == 'close': - self.close_connection = 1 - elif value.lower() == 'keep-alive': - self.close_connection = 0 - - def end_headers(self): - """Send the blank line ending the MIME headers.""" - if self.request_version != 'HTTP/0.9': - self._headers_buffer.append(b"\r\n") - self.flush_headers() - - def flush_headers(self): - if hasattr(self, '_headers_buffer'): - self.wfile.write(b"".join(self._headers_buffer)) - self._headers_buffer = [] - - def log_request(self, code='-', size='-'): - """Log an accepted request. - - This is called by send_response(). - - """ - - self.log_message('"%s" %s %s', - self.requestline, str(code), str(size)) - - def log_error(self, format, *args): - """Log an error. - - This is called when a request cannot be fulfilled. By - default it passes the message on to log_message(). - - Arguments are the same as for log_message(). - - XXX This should go to the separate error log. - - """ - - self.log_message(format, *args) - - def log_message(self, format, *args): - """Log an arbitrary message. - - This is used by all other logging functions. Override - it if you have specific logging wishes. - - The first argument, FORMAT, is a format string for the - message to be logged. If the format string contains - any % escapes requiring parameters, they should be - specified as subsequent arguments (it's just like - printf!). - - The client ip and current date/time are prefixed to - every message. - - """ - - sys.stderr.write("%s - - [%s] %s\n" % - (self.address_string(), - self.log_date_time_string(), - format%args)) - - def version_string(self): - """Return the server software version string.""" - return self.server_version + ' ' + self.sys_version - - def date_time_string(self, timestamp=None): - """Return the current date and time formatted for a message header.""" - if timestamp is None: - timestamp = time.time() - year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) - s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( - self.weekdayname[wd], - day, self.monthname[month], year, - hh, mm, ss) - return s - - def log_date_time_string(self): - """Return the current time formatted for logging.""" - now = time.time() - year, month, day, hh, mm, ss, x, y, z = time.localtime(now) - s = "%02d/%3s/%04d %02d:%02d:%02d" % ( - day, self.monthname[month], year, hh, mm, ss) - return s - - weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - - monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - - def address_string(self): - """Return the client address.""" - - return self.client_address[0] - - # Essentially static class variables - - # The version of the HTTP protocol we support. - # Set this to HTTP/1.1 to enable automatic keepalive - protocol_version = "HTTP/1.0" - - # MessageClass used to parse headers - MessageClass = http_client.HTTPMessage - - # Table mapping response codes to messages; entries have the - # form {code: (shortmessage, longmessage)}. - # See RFC 2616 and 6585. - responses = { - 100: ('Continue', 'Request received, please continue'), - 101: ('Switching Protocols', - 'Switching to new protocol; obey Upgrade header'), - - 200: ('OK', 'Request fulfilled, document follows'), - 201: ('Created', 'Document created, URL follows'), - 202: ('Accepted', - 'Request accepted, processing continues off-line'), - 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), - 204: ('No Content', 'Request fulfilled, nothing follows'), - 205: ('Reset Content', 'Clear input form for further input.'), - 206: ('Partial Content', 'Partial content follows.'), - - 300: ('Multiple Choices', - 'Object has several resources -- see URI list'), - 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), - 302: ('Found', 'Object moved temporarily -- see URI list'), - 303: ('See Other', 'Object moved -- see Method and URL list'), - 304: ('Not Modified', - 'Document has not changed since given time'), - 305: ('Use Proxy', - 'You must use proxy specified in Location to access this ' - 'resource.'), - 307: ('Temporary Redirect', - 'Object moved temporarily -- see URI list'), - - 400: ('Bad Request', - 'Bad request syntax or unsupported method'), - 401: ('Unauthorized', - 'No permission -- see authorization schemes'), - 402: ('Payment Required', - 'No payment -- see charging schemes'), - 403: ('Forbidden', - 'Request forbidden -- authorization will not help'), - 404: ('Not Found', 'Nothing matches the given URI'), - 405: ('Method Not Allowed', - 'Specified method is invalid for this resource.'), - 406: ('Not Acceptable', 'URI not available in preferred format.'), - 407: ('Proxy Authentication Required', 'You must authenticate with ' - 'this proxy before proceeding.'), - 408: ('Request Timeout', 'Request timed out; try again later.'), - 409: ('Conflict', 'Request conflict.'), - 410: ('Gone', - 'URI no longer exists and has been permanently removed.'), - 411: ('Length Required', 'Client must specify Content-Length.'), - 412: ('Precondition Failed', 'Precondition in headers is false.'), - 413: ('Request Entity Too Large', 'Entity is too large.'), - 414: ('Request-URI Too Long', 'URI is too long.'), - 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), - 416: ('Requested Range Not Satisfiable', - 'Cannot satisfy request range.'), - 417: ('Expectation Failed', - 'Expect condition could not be satisfied.'), - 428: ('Precondition Required', - 'The origin server requires the request to be conditional.'), - 429: ('Too Many Requests', 'The user has sent too many requests ' - 'in a given amount of time ("rate limiting").'), - 431: ('Request Header Fields Too Large', 'The server is unwilling to ' - 'process the request because its header fields are too large.'), - - 500: ('Internal Server Error', 'Server got itself in trouble'), - 501: ('Not Implemented', - 'Server does not support this operation'), - 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), - 503: ('Service Unavailable', - 'The server cannot process the request due to a high load'), - 504: ('Gateway Timeout', - 'The gateway server did not receive a timely response'), - 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), - 511: ('Network Authentication Required', - 'The client needs to authenticate to gain network access.'), - } - - -class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): - - """Simple HTTP request handler with GET and HEAD commands. - - This serves files from the current directory and any of its - subdirectories. The MIME type for files is determined by - calling the .guess_type() method. - - The GET and HEAD requests are identical except that the HEAD - request omits the actual contents of the file. - - """ - - server_version = "SimpleHTTP/" + __version__ - - def do_GET(self): - """Serve a GET request.""" - f = self.send_head() - if f: - self.copyfile(f, self.wfile) - f.close() - - def do_HEAD(self): - """Serve a HEAD request.""" - f = self.send_head() - if f: - f.close() - - def send_head(self): - """Common code for GET and HEAD commands. - - This sends the response code and MIME headers. - - Return value is either a file object (which has to be copied - to the outputfile by the caller unless the command was HEAD, - and must be closed by the caller under all circumstances), or - None, in which case the caller has nothing further to do. - - """ - path = self.translate_path(self.path) - f = None - if os.path.isdir(path): - if not self.path.endswith('/'): - # redirect browser - doing basically what apache does - self.send_response(301) - self.send_header("Location", self.path + "/") - self.end_headers() - return None - for index in "index.html", "index.htm": - index = os.path.join(path, index) - if os.path.exists(index): - path = index - break - else: - return self.list_directory(path) - ctype = self.guess_type(path) - try: - f = open(path, 'rb') - except IOError: - self.send_error(404, "File not found") - return None - self.send_response(200) - self.send_header("Content-type", ctype) - fs = os.fstat(f.fileno()) - self.send_header("Content-Length", str(fs[6])) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.end_headers() - return f - - def list_directory(self, path): - """Helper to produce a directory listing (absent index.html). - - Return value is either a file object, or None (indicating an - error). In either case, the headers are sent, making the - interface the same as for send_head(). - - """ - try: - list = os.listdir(path) - except os.error: - self.send_error(404, "No permission to list directory") - return None - list.sort(key=lambda a: a.lower()) - r = [] - displaypath = html.escape(urllib_parse.unquote(self.path)) - enc = sys.getfilesystemencoding() - title = 'Directory listing for %s' % displaypath - r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' - '"http://www.w3.org/TR/html4/strict.dtd">') - r.append('<html>\n<head>') - r.append('<meta http-equiv="Content-Type" ' - 'content="text/html; charset=%s">' % enc) - r.append('<title>%s</title>\n</head>' % title) - r.append('<body>\n<h1>%s</h1>' % title) - r.append('<hr>\n<ul>') - for name in list: - fullname = os.path.join(path, name) - displayname = linkname = name - # Append / for directories or @ for symbolic links - if os.path.isdir(fullname): - displayname = name + "/" - linkname = name + "/" - if os.path.islink(fullname): - displayname = name + "@" - # Note: a link to a directory displays with @ and links with / - r.append('<li><a href="%s">%s</a></li>' - % (urllib_parse.quote(linkname), html.escape(displayname))) - # # Use this instead: - # r.append('<li><a href="%s">%s</a></li>' - # % (urllib.quote(linkname), cgi.escape(displayname))) - r.append('</ul>\n<hr>\n</body>\n</html>\n') - encoded = '\n'.join(r).encode(enc) - f = io.BytesIO() - f.write(encoded) - f.seek(0) - self.send_response(200) - self.send_header("Content-type", "text/html; charset=%s" % enc) - self.send_header("Content-Length", str(len(encoded))) - self.end_headers() - return f - - def translate_path(self, path): - """Translate a /-separated PATH to the local filename syntax. - - Components that mean special things to the local file system - (e.g. drive or directory names) are ignored. (XXX They should - probably be diagnosed.) - - """ - # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] - path = posixpath.normpath(urllib_parse.unquote(path)) - words = path.split('/') - words = filter(None, words) - path = os.getcwd() - for word in words: - drive, word = os.path.splitdrive(word) - head, word = os.path.split(word) - if word in (os.curdir, os.pardir): continue - path = os.path.join(path, word) - return path - - def copyfile(self, source, outputfile): - """Copy all data between two file objects. - - The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). - - The only reason for overriding this would be to change - the block size or perhaps to replace newlines by CRLF - -- note however that this the default server uses this - to copy binary data as well. - - """ - shutil.copyfileobj(source, outputfile) - - def guess_type(self, path): - """Guess the type of a file. - - Argument is a PATH (a filename). - - Return value is a string of the form type/subtype, - usable for a MIME Content-type header. - - The default implementation looks the file's extension - up in the table self.extensions_map, using application/octet-stream - as a default; however it would be permissible (if - slow) to look inside the data to make a better guess. - - """ - - base, ext = posixpath.splitext(path) - if ext in self.extensions_map: - return self.extensions_map[ext] - ext = ext.lower() - if ext in self.extensions_map: - return self.extensions_map[ext] - else: - return self.extensions_map[''] - - if not mimetypes.inited: - mimetypes.init() # try to read system mime.types - extensions_map = mimetypes.types_map.copy() - extensions_map.update({ - '': 'application/octet-stream', # Default - '.py': 'text/plain', - '.c': 'text/plain', - '.h': 'text/plain', - }) - - -# Utilities for CGIHTTPRequestHandler - -def _url_collapse_path(path): - """ - Given a URL path, remove extra '/'s and '.' path elements and collapse - any '..' references and returns a colllapsed path. - - Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. - The utility of this function is limited to is_cgi method and helps - preventing some security attacks. - - Returns: A tuple of (head, tail) where tail is everything after the final / - and head is everything before it. Head will always start with a '/' and, - if it contains anything else, never have a trailing '/'. - - Raises: IndexError if too many '..' occur within the path. - - """ - # Similar to os.path.split(os.path.normpath(path)) but specific to URL - # path semantics rather than local operating system semantics. - path_parts = path.split('/') - head_parts = [] - for part in path_parts[:-1]: - if part == '..': - head_parts.pop() # IndexError if more '..' than prior parts - elif part and part != '.': - head_parts.append( part ) - if path_parts: - tail_part = path_parts.pop() - if tail_part: - if tail_part == '..': - head_parts.pop() - tail_part = '' - elif tail_part == '.': - tail_part = '' - else: - tail_part = '' - - splitpath = ('/' + '/'.join(head_parts), tail_part) - collapsed_path = "/".join(splitpath) - - return collapsed_path - - - -nobody = None - -def nobody_uid(): - """Internal routine to get nobody's uid""" - global nobody - if nobody: - return nobody - try: - import pwd - except ImportError: - return -1 - try: - nobody = pwd.getpwnam('nobody')[2] - except KeyError: - nobody = 1 + max(x[2] for x in pwd.getpwall()) - return nobody - - -def executable(path): - """Test for executable file.""" - return os.access(path, os.X_OK) - - -class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): - - """Complete HTTP server with GET, HEAD and POST commands. - - GET and HEAD also support running CGI scripts. - - The POST command is *only* implemented for CGI scripts. - - """ - - # Determine platform specifics - have_fork = hasattr(os, 'fork') - - # Make rfile unbuffered -- we need to read one line and then pass - # the rest to a subprocess, so we can't use buffered input. - rbufsize = 0 - - def do_POST(self): - """Serve a POST request. - - This is only implemented for CGI scripts. - - """ - - if self.is_cgi(): - self.run_cgi() - else: - self.send_error(501, "Can only POST to CGI scripts") - - def send_head(self): - """Version of send_head that support CGI scripts""" - if self.is_cgi(): - return self.run_cgi() - else: - return SimpleHTTPRequestHandler.send_head(self) - - def is_cgi(self): - """Test whether self.path corresponds to a CGI script. - - Returns True and updates the cgi_info attribute to the tuple - (dir, rest) if self.path requires running a CGI script. - Returns False otherwise. - - If any exception is raised, the caller should assume that - self.path was rejected as invalid and act accordingly. - - The default implementation tests whether the normalized url - path begins with one of the strings in self.cgi_directories - (and the next character is a '/' or the end of the string). - - """ - collapsed_path = _url_collapse_path(self.path) - dir_sep = collapsed_path.find('/', 1) - head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] - if head in self.cgi_directories: - self.cgi_info = head, tail - return True - return False - - - cgi_directories = ['/cgi-bin', '/htbin'] - - def is_executable(self, path): - """Test whether argument path is an executable file.""" - return executable(path) - - def is_python(self, path): - """Test whether argument path is a Python script.""" - head, tail = os.path.splitext(path) - return tail.lower() in (".py", ".pyw") - - def run_cgi(self): - """Execute a CGI script.""" - path = self.path - dir, rest = self.cgi_info - - i = path.find('/', len(dir) + 1) - while i >= 0: - nextdir = path[:i] - nextrest = path[i+1:] - - scriptdir = self.translate_path(nextdir) - if os.path.isdir(scriptdir): - dir, rest = nextdir, nextrest - i = path.find('/', len(dir) + 1) - else: - break - - # find an explicit query string, if present. - i = rest.rfind('?') - if i >= 0: - rest, query = rest[:i], rest[i+1:] - else: - query = '' - - # dissect the part after the directory name into a script name & - # a possible additional path, to be stored in PATH_INFO. - i = rest.find('/') - if i >= 0: - script, rest = rest[:i], rest[i:] - else: - script, rest = rest, '' - - scriptname = dir + '/' + script - scriptfile = self.translate_path(scriptname) - if not os.path.exists(scriptfile): - self.send_error(404, "No such CGI script (%r)" % scriptname) - return - if not os.path.isfile(scriptfile): - self.send_error(403, "CGI script is not a plain file (%r)" % - scriptname) - return - ispy = self.is_python(scriptname) - if self.have_fork or not ispy: - if not self.is_executable(scriptfile): - self.send_error(403, "CGI script is not executable (%r)" % - scriptname) - return - - # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html - # XXX Much of the following could be prepared ahead of time! - env = copy.deepcopy(os.environ) - env['SERVER_SOFTWARE'] = self.version_string() - env['SERVER_NAME'] = self.server.server_name - env['GATEWAY_INTERFACE'] = 'CGI/1.1' - env['SERVER_PROTOCOL'] = self.protocol_version - env['SERVER_PORT'] = str(self.server.server_port) - env['REQUEST_METHOD'] = self.command - uqrest = urllib_parse.unquote(rest) - env['PATH_INFO'] = uqrest - env['PATH_TRANSLATED'] = self.translate_path(uqrest) - env['SCRIPT_NAME'] = scriptname - if query: - env['QUERY_STRING'] = query - env['REMOTE_ADDR'] = self.client_address[0] - authorization = self.headers.get("authorization") - if authorization: - authorization = authorization.split() - if len(authorization) == 2: - import base64, binascii - env['AUTH_TYPE'] = authorization[0] - if authorization[0].lower() == "basic": - try: - authorization = authorization[1].encode('ascii') - if utils.PY3: - # In Py3.3, was: - authorization = base64.decodebytes(authorization).\ - decode('ascii') - else: - # Backport to Py2.7: - authorization = base64.decodestring(authorization).\ - decode('ascii') - except (binascii.Error, UnicodeError): - pass - else: - authorization = authorization.split(':') - if len(authorization) == 2: - env['REMOTE_USER'] = authorization[0] - # XXX REMOTE_IDENT - if self.headers.get('content-type') is None: - env['CONTENT_TYPE'] = self.headers.get_content_type() - else: - env['CONTENT_TYPE'] = self.headers['content-type'] - length = self.headers.get('content-length') - if length: - env['CONTENT_LENGTH'] = length - referer = self.headers.get('referer') - if referer: - env['HTTP_REFERER'] = referer - accept = [] - for line in self.headers.getallmatchingheaders('accept'): - if line[:1] in "\t\n\r ": - accept.append(line.strip()) - else: - accept = accept + line[7:].split(',') - env['HTTP_ACCEPT'] = ','.join(accept) - ua = self.headers.get('user-agent') - if ua: - env['HTTP_USER_AGENT'] = ua - co = filter(None, self.headers.get_all('cookie', [])) - cookie_str = ', '.join(co) - if cookie_str: - env['HTTP_COOKIE'] = cookie_str - # XXX Other HTTP_* headers - # Since we're setting the env in the parent, provide empty - # values to override previously set values - for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', - 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): - env.setdefault(k, "") - - self.send_response(200, "Script output follows") - self.flush_headers() - - decoded_query = query.replace('+', ' ') - - if self.have_fork: - # Unix -- fork as we should - args = [script] - if '=' not in decoded_query: - args.append(decoded_query) - nobody = nobody_uid() - self.wfile.flush() # Always flush before forking - pid = os.fork() - if pid != 0: - # Parent - pid, sts = os.waitpid(pid, 0) - # throw away additional data [see bug #427345] - while select.select([self.rfile], [], [], 0)[0]: - if not self.rfile.read(1): - break - if sts: - self.log_error("CGI script exit status %#x", sts) - return - # Child - try: - try: - os.setuid(nobody) - except os.error: - pass - os.dup2(self.rfile.fileno(), 0) - os.dup2(self.wfile.fileno(), 1) - os.execve(scriptfile, args, env) - except: - self.server.handle_error(self.request, self.client_address) - os._exit(127) - - else: - # Non-Unix -- use subprocess - import subprocess - cmdline = [scriptfile] - if self.is_python(scriptfile): - interp = sys.executable - if interp.lower().endswith("w.exe"): - # On Windows, use python.exe, not pythonw.exe - interp = interp[:-5] + interp[-4:] - cmdline = [interp, '-u'] + cmdline - if '=' not in query: - cmdline.append(query) - self.log_message("command: %s", subprocess.list2cmdline(cmdline)) - try: - nbytes = int(length) - except (TypeError, ValueError): - nbytes = 0 - p = subprocess.Popen(cmdline, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env = env - ) - if self.command.lower() == "post" and nbytes > 0: - data = self.rfile.read(nbytes) - else: - data = None - # throw away additional data [see bug #427345] - while select.select([self.rfile._sock], [], [], 0)[0]: - if not self.rfile._sock.recv(1): - break - stdout, stderr = p.communicate(data) - self.wfile.write(stdout) - if stderr: - self.log_error('%s', stderr) - p.stderr.close() - p.stdout.close() - status = p.returncode - if status: - self.log_error("CGI script exit status %#x", status) - else: - self.log_message("CGI script exited OK") - - -def test(HandlerClass = BaseHTTPRequestHandler, - ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000): - """Test the HTTP request handler class. - - This runs an HTTP server on port 8000 (or the first command line - argument). - - """ - server_address = ('', port) - - HandlerClass.protocol_version = protocol - httpd = ServerClass(server_address, HandlerClass) - - sa = httpd.socket.getsockname() - print("Serving HTTP on", sa[0], "port", sa[1], "...") - try: - httpd.serve_forever() - except KeyboardInterrupt: - print("\nKeyboard interrupt received, exiting.") - httpd.server_close() - sys.exit(0) - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--cgi', action='store_true', - help='Run as CGI Server') - parser.add_argument('port', action='store', - default=8000, type=int, - nargs='?', - help='Specify alternate port [default: 8000]') - args = parser.parse_args() - if args.cgi: - test(HandlerClass=CGIHTTPRequestHandler, port=args.port) - else: - test(HandlerClass=SimpleHTTPRequestHandler, port=args.port) |
