Removing exceptions from core/compiler-core (#1953)

* #include an absolute path didn't work - because paths were taken to always be relative. * Refactor Stream. Working on all tests. * Split out CharEncode. * Make method names lower camel. m_prefix in Writer/Reader * Tidy up around CharEncode interface. * Small improvements around encode/decode. * Better use of types. * Remove readLine from TextReader. * Remove exceptions from Stream/Text handling. * Fix some typos. * Fix tabbing. * Fix missing override. * Remove remaining exception throw/catch via using signal mechanism. * Remove exceptions that are not used anymore. * Document the Stream interface. * Remove index for decoding 'get byte' function. * Fix CharReader -> ByteReader.
author: jsmall-nvidia <jsmall@nvidia.com> 2021-10-04 14:15:51 -0400
committer: GitHub <noreply@github.com> 2021-10-04 14:15:51 -0400
commit: 97bb82ebcdf8f1391b9d93b5a8d7b1dfc4e88e52 (patch)
tree: f120ba282cbea96d23ed179737984a4610d3b520 /source/core/slang-char-encode.cpp
parent: b3dfe383c6d31ff3dbd76dcfb32de8d536382f3e (diff)
1 files changed, 181 insertions, 0 deletions
diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp
new file mode 100644
index 000000000..d061e34ba
--- /dev/null
+++ b/source/core/slang-char-encode.cpp
@@ -0,0 +1,181 @@
+#include "slang-char-encode.h"
+
+namespace Slang
+{
+
+class Utf8CharEncoding : public CharEncoding 
+{
+public:
+    typedef CharEncoding Super;
+
+	virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override
+	{
+        ioBuffer.addRange((const Byte*)slice.begin(), slice.getLength());
+	}
+	virtual void decode(const Byte* bytes, int length, List<char>& ioChars) override
+	{
+        ioChars.addRange((const char*)bytes, length);
+	}
+    Utf8CharEncoding() : Super(CharEncodeType::UTF8) {}
+};
+
+class Utf32CharEncoding : public CharEncoding
+{
+public:
+    typedef CharEncoding Super;
+
+	virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override
+	{
+		Index ptr = 0;
+		while (ptr < slice.getLength())
+		{
+            const Char32 codePoint = getUnicodePointFromUTF8([&]() -> Byte
+			{
+				if (ptr < slice.getLength())
+					return slice[ptr++];
+				else
+					return '\0';
+			});
+            // Note: Assumes byte order is same as arch byte order
+            ioBuffer.addRange((const Byte*)&codePoint, 4);
+		}
+	}
+	virtual void decode(const Byte* bytes, int length, List<char>& ioBuffer) override
+	{
+        // Note: Assumes bytes is Char32 aligned
+        SLANG_ASSERT((size_t(bytes) & 3) == 0);
+		const Char32* content = (const Char32*)bytes;
+		for (int i = 0; i < (length >> 2); i++)
+		{
+			char buf[5];
+			int count = encodeUnicodePointToUTF8(content[i], buf);
+            for (int j = 0; j < count; j++)
+                ioBuffer.addRange(buf, count);
+		}
+	}
+
+    Utf32CharEncoding() : Super(CharEncodeType::UTF32) {}
+};
+
+class Utf16CharEncoding : public CharEncoding //UTF16
+{
+public:
+    typedef CharEncoding Super;
+	Utf16CharEncoding(bool reverseOrder):
+        Super(reverseOrder ? CharEncodeType::UTF16Reversed : CharEncodeType::UTF16),
+		m_reverseOrder(reverseOrder)
+	{}
+	virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override
+	{
+		Index index = 0;
+		while (index < slice.getLength())
+		{
+            const Char32 codePoint = getUnicodePointFromUTF8([&]() -> Byte
+			{
+				if (index < slice.getLength())
+					return slice[index++];
+				else
+					return '\0';
+			});
+
+			Char16 buffer[2];
+			int count;
+			if (!m_reverseOrder)
+				count = encodeUnicodePointToUTF16(codePoint, buffer);
+			else
+				count = encodeUnicodePointToUTF16Reversed(codePoint, buffer);
+            ioBuffer.addRange((const Byte*)buffer, count * 2);
+		}
+	}
+	virtual void decode(const Byte* bytes, int length, List<char>& ioBuffer) override
+	{
+		Index index = 0;
+		while (index < length)
+		{
+			const Char32 codePoint = getUnicodePointFromUTF16([&]() -> Byte
+			{
+                if (index < length)
+                    return bytes[index++];
+                else
+                    return Byte(0);
+			});
+
+			char buf[5];
+			int count = encodeUnicodePointToUTF8(codePoint, buf);
+            ioBuffer.addRange((const char*)buf, count);
+		}
+	}
+
+private:
+    bool m_reverseOrder = false;
+};
+
+/* static */CharEncodeType CharEncoding::determineEncoding(const Byte* bytes, size_t bytesCount, size_t& outOffset)
+{
+    // TODO(JS): Assumes the bytes are suitably aligned
+
+    if (bytesCount >= 3 && bytes[0] == 0xef && bytes[1] == 0xbb && bytes[2] == 0xbf)
+    {
+        outOffset = 3;
+        return CharEncodeType::UTF8;
+    }
+    else if (bytesCount >= 2)
+    {
+        Char16 c;
+        ::memcpy(&c, bytes, 2);
+
+        if (c == kUTF16Header)
+        {
+            outOffset = 2;
+            return CharEncodeType::UTF16;
+        }
+        else if (c == kUTF16ReversedHeader)
+        {
+            outOffset = 2;
+            return CharEncodeType::UTF16Reversed;
+        }
+    }
+    else
+    {
+        // If we don't have a 'mark' byte then we are bit stumped. We'll look for a null bytes and assume they mean we have a 16 bit encoding
+        for (size_t i = 0; i < bytesCount; i += 2)
+        {
+#if SLANG_LITTLE_ENDIAN
+            const auto low = bytes[i];
+            const auto high = bytes[i + 1];
+#else
+            const auto low = bytes[i + 1];
+            const auto high = bytes[i];
+#endif 
+            if ((low == 0) ^ (high == 0))
+            {
+                outOffset = 2;
+                return (high == 0) ? CharEncodeType::UTF16 : CharEncodeType::UTF16Reversed;
+            }
+        }
+    }
+
+    // Assume it's UTF8 or 7 bit ascii which UTF8 is a superset of
+    outOffset = 0;
+    return CharEncodeType::UTF8;
+}
+
+static Utf8CharEncoding _utf8Encoding;
+static Utf16CharEncoding _utf16Encoding(false);
+static Utf16CharEncoding _utf16EncodingReversed(true);
+static Utf32CharEncoding _utf32Encoding;
+
+/* static */CharEncoding* const CharEncoding::g_encoding[Index(CharEncodeType::CountOf)]
+{
+    &_utf8Encoding,             // UTF8,
+    &_utf16Encoding,            // UTF16,
+    &_utf16EncodingReversed,    // UTF16Reversed,
+    &_utf32Encoding,            // UTF32,
+};
+
+CharEncoding* CharEncoding::UTF8 = &_utf8Encoding;
+CharEncoding* CharEncoding::UTF16 = &_utf16Encoding;
+CharEncoding* CharEncoding::UTF16Reversed = &_utf16EncodingReversed;
+CharEncoding* CharEncoding::UTF32 = &_utf32Encoding;
+	
+} // namespace Slang
author	jsmall-nvidia <jsmall@nvidia.com>	2021-10-04 14:15:51 -0400
committer	GitHub <noreply@github.com>	2021-10-04 14:15:51 -0400
commit	97bb82ebcdf8f1391b9d93b5a8d7b1dfc4e88e52 (patch)
tree	f120ba282cbea96d23ed179737984a4610d3b520 /source/core/slang-char-encode.cpp
parent	b3dfe383c6d31ff3dbd76dcfb32de8d536382f3e (diff)