From 4d836989720523cd0363927e3e066f56b9dc445c Mon Sep 17 00:00:00 2001
From: yum
Date: Sat, 17 Dec 2022 17:26:16 -0800
Subject: Check in `future` package
I hit some issues installing Whisper and had to embed this package.
I haven't taken the time to deeply understand what's going on. I think
that embedded Python follows different rules about resolving module
paths than regular system Python.
Basically, `future`'s setup.py has a line like `import src`, where
`src` is a module inside future (like `future/src/__init__.py`). This
doesn't work unless we put that directory on the search path.
---
.../tests/test_future/test_htmlparser.py | 764 +++++++++++++++++++++
1 file changed, 764 insertions(+)
create mode 100644 Python/Dependencies/future-0.18.2/tests/test_future/test_htmlparser.py
(limited to 'Python/Dependencies/future-0.18.2/tests/test_future/test_htmlparser.py')
diff --git a/Python/Dependencies/future-0.18.2/tests/test_future/test_htmlparser.py b/Python/Dependencies/future-0.18.2/tests/test_future/test_htmlparser.py
new file mode 100644
index 0000000..7a745ac
--- /dev/null
+++ b/Python/Dependencies/future-0.18.2/tests/test_future/test_htmlparser.py
@@ -0,0 +1,764 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for the html.parser functions.
+
+Adapted for the python-future module from the Python 3.3 standard library
+tests.
+"""
+
+from __future__ import (absolute_import, print_function, unicode_literals)
+from future import standard_library, utils
+from future.builtins import *
+
+from future.backports.test import support
+import future.backports.html.parser as html_parser
+
+import pprint
+from future.tests.base import unittest
+import sys
+
+# print(html_parser.__doc__, file=sys.stderr)
+
+
+class EventCollector(html_parser.HTMLParser):
+
+ def __init__(self, *args, **kw):
+ self.events = []
+ self.append = self.events.append
+ html_parser.HTMLParser.__init__(self, *args, **kw)
+
+ def get_events(self):
+ # Normalize the list of events so that buffer artefacts don't
+ # separate runs of contiguous characters.
+ L = []
+ prevtype = None
+ for event in self.events:
+ type = event[0]
+ if type == prevtype == "data":
+ L[-1] = ("data", L[-1][1] + event[1])
+ else:
+ L.append(event)
+ prevtype = type
+ self.events = L
+ return L
+
+ # structure markup
+
+ def handle_starttag(self, tag, attrs):
+ self.append(("starttag", tag, attrs))
+
+ def handle_startendtag(self, tag, attrs):
+ self.append(("startendtag", tag, attrs))
+
+ def handle_endtag(self, tag):
+ self.append(("endtag", tag))
+
+ # all other markup
+
+ def handle_comment(self, data):
+ self.append(("comment", data))
+
+ def handle_charref(self, data):
+ self.append(("charref", data))
+
+ def handle_data(self, data):
+ self.append(("data", data))
+
+ def handle_decl(self, data):
+ self.append(("decl", data))
+
+ def handle_entityref(self, data):
+ self.append(("entityref", data))
+
+ def handle_pi(self, data):
+ self.append(("pi", data))
+
+ def unknown_decl(self, decl):
+ self.append(("unknown decl", decl))
+
+
+class EventCollectorExtra(EventCollector):
+
+ def handle_starttag(self, tag, attrs):
+ EventCollector.handle_starttag(self, tag, attrs)
+ self.append(("starttag_text", self.get_starttag_text()))
+
+
+class TestCaseBase(unittest.TestCase):
+
+ def get_collector(self):
+ raise NotImplementedError
+
+ def _run_check(self, source, expected_events, collector=None):
+ if collector is None:
+ collector = self.get_collector()
+ parser = collector
+ for s in source:
+ parser.feed(s)
+ parser.close()
+ events = parser.get_events()
+ if events != expected_events:
+ self.fail("received events did not match expected events\n"
+ "Expected:\n" + pprint.pformat(expected_events) +
+ "\nReceived:\n" + pprint.pformat(events))
+
+ def _run_check_extra(self, source, events):
+ self._run_check(source, events, EventCollectorExtra())
+
+ def _parse_error(self, source):
+ def parse(source=source):
+ parser = self.get_collector()
+ parser.feed(source)
+ parser.close()
+ self.assertRaises(html_parser.HTMLParseError, parse)
+
+
+class HTMLParserStrictTestCase(TestCaseBase):
+
+ def get_collector(self):
+ with support.check_warnings(("", DeprecationWarning), quiet=False):
+ return EventCollector(strict=True)
+
+ def test_processing_instruction_only(self):
+ self._run_check("", [
+ ("pi", "processing instruction"),
+ ])
+ self._run_check("", [
+ ("pi", "processing instruction ?"),
+ ])
+
+ def test_simple_html(self):
+ self._run_check("""
+
+&entity;
+
+
sample
+text
+“
+
+
+""", [
+ ("data", "\n"),
+ ("decl", "DOCTYPE html PUBLIC 'foo'"),
+ ("data", "\n"),
+ ("starttag", "html", []),
+ ("entityref", "entity"),
+ ("charref", "32"),
+ ("data", "\n"),
+ ("comment", "comment1a\n-><bad;
", [
+ ("starttag", "p", []),
+ ("data", "bad;"),
+ ("endtag", "p"),
+ ])
+
+ def test_unclosed_entityref(self):
+ self._run_check("&entityref foo", [
+ ("entityref", "entityref"),
+ ("data", " foo"),
+ ])
+
+ def test_bad_nesting(self):
+ # Strangely, this *is* supposed to test that overlapping
+ # elements are allowed. HTMLParser is more geared toward
+ # lexing the input that parsing the structure.
+ self._run_check("", [
+ ("starttag", "a", []),
+ ("starttag", "b", []),
+ ("endtag", "a"),
+ ("endtag", "b"),
+ ])
+
+ def test_bare_ampersands(self):
+ self._run_check("this text & contains & ampersands &", [
+ ("data", "this text & contains & ampersands &"),
+ ])
+
+ def test_bare_pointy_brackets(self):
+ self._run_check("this < text > contains < bare>pointy< brackets", [
+ ("data", "this < text > contains < bare>pointy< brackets"),
+ ])
+
+ def test_illegal_declarations(self):
+ self._parse_error('')
+
+ def test_starttag_end_boundary(self):
+ self._run_check("""""", [("starttag", "a", [("b", "<")])])
+ self._run_check("""""", [("starttag", "a", [("b", ">")])])
+
+ def test_buffer_artefacts(self):
+ output = [("starttag", "a", [("b", "<")])]
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+
+ output = [("starttag", "a", [("b", ">")])]
+ self._run_check([""], output)
+ self._run_check(["'>"], output)
+ self._run_check(["'>"], output)
+ self._run_check(["'>"], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+
+ output = [("comment", "abc")]
+ self._run_check(["", ""], output)
+ self._run_check(["<", "!--abc-->"], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check([""], output)
+ self._run_check(["", ""], output)
+
+ def test_starttag_junk_chars(self):
+ self._parse_error(">")
+ self._parse_error("$>")
+ self._parse_error("")
+ self._parse_error("")
+ self._parse_error("")
+ self._parse_error("'")
+ self._parse_error("" % dtd,
+ [('decl', 'DOCTYPE ' + dtd)])
+
+ def test_declaration_junk_chars(self):
+ self._parse_error("")
+
+ def test_startendtag(self):
+ self._run_check("", [
+ ("startendtag", "p", []),
+ ])
+ self._run_check("", [
+ ("starttag", "p", []),
+ ("endtag", "p"),
+ ])
+ self._run_check("
", [
+ ("starttag", "p", []),
+ ("startendtag", "img", [("src", "foo")]),
+ ("endtag", "p"),
+ ])
+
+ def test_get_starttag_text(self):
+ s = """"""
+ self._run_check_extra(s, [
+ ("starttag", "foo:bar", [("one", "1"), ("two", "2")]),
+ ("starttag_text", s)])
+
+ def test_cdata_content(self):
+ contents = [
+ ' ¬-an-entity-ref;',
+ "",
+ ' ',
+ 'foo = "";',
+ 'foo = "";',
+ 'foo = <\n/script> ',
+ '',
+ ('\n//<\\/s\'+\'cript>\');\n//]]>'),
+ '\n\n',
+ 'foo = "";',
+ '',
+ # these two should be invalid according to the HTML 5 spec,
+ # section 8.1.2.2
+ #'foo = \nscript>',
+ #'foo = script>',
+ ]
+ elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style']
+ for content in contents:
+ for element in elements:
+ element_lower = element.lower()
+ s = '<{element}>{content}{element}>'.format(element=element,
+ content=content)
+ self._run_check(s, [("starttag", element_lower, []),
+ ("data", content),
+ ("endtag", element_lower)])
+
+ def test_cdata_with_closing_tags(self):
+ # see issue #13358
+ # make sure that HTMLParser calls handle_data only once for each CDATA.
+ # The normal event collector normalizes the events in get_events,
+ # so we override it to return the original list of events.
+ class Collector(EventCollector):
+ def get_events(self):
+ return self.events
+
+ content = """ ¬-an-entity-ref;
+
+ ''"""
+ for element in [' script', 'script ', ' script ',
+ '\nscript', 'script\n', '\nscript\n']:
+ element_lower = element.lower().strip()
+ s = '