diff options
| author | Ellie Hermaszewska <ellieh@nvidia.com> | 2024-10-29 14:49:26 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-29 14:49:26 +0800 |
| commit | f65d756bff8d4c5cbc15bd0322a2ae8e6b896a21 (patch) | |
| tree | ea1d61342cd29368e19135000ec2948813096205 /source/core/slang-char-encode.cpp | |
| parent | a729c15e9dce9f5116a38afc66329ab2ca4cea54 (diff) | |
format
* format
* Minor test fixes
* enable checking cpp format in ci
Diffstat (limited to 'source/core/slang-char-encode.cpp')
| -rw-r--r-- | source/core/slang-char-encode.cpp | 192 |
1 files changed, 101 insertions, 91 deletions
diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp index 526c6c923..a27e7ba82 100644 --- a/source/core/slang-char-encode.cpp +++ b/source/core/slang-char-encode.cpp @@ -3,20 +3,23 @@ namespace Slang { -class Utf8CharEncoding : public CharEncoding +class Utf8CharEncoding : public CharEncoding { public: typedef CharEncoding Super; - virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override - { + virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override + { ioBuffer.addRange((const Byte*)slice.begin(), slice.getLength()); - } - virtual void decode(const Byte* bytes, int length, List<char>& ioChars) override - { + } + virtual void decode(const Byte* bytes, int length, List<char>& ioChars) override + { ioChars.addRange((const char*)bytes, length); - } - Utf8CharEncoding() : Super(CharEncodeType::UTF8) {} + } + Utf8CharEncoding() + : Super(CharEncodeType::UTF8) + { + } }; class Utf32CharEncoding : public CharEncoding @@ -24,93 +27,98 @@ class Utf32CharEncoding : public CharEncoding public: typedef CharEncoding Super; - virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override - { - Index ptr = 0; - while (ptr < slice.getLength()) - { - const Char32 codePoint = getUnicodePointFromUTF8([&]() -> Byte - { - if (ptr < slice.getLength()) - return slice[ptr++]; - else - return '\0'; - }); + virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override + { + Index ptr = 0; + while (ptr < slice.getLength()) + { + const Char32 codePoint = getUnicodePointFromUTF8( + [&]() -> Byte + { + if (ptr < slice.getLength()) + return slice[ptr++]; + else + return '\0'; + }); // Note: Assumes byte order is same as arch byte order ioBuffer.addRange((const Byte*)&codePoint, 4); - } - } - virtual void decode(const Byte* bytes, int length, List<char>& ioBuffer) override - { + } + } + virtual void decode(const Byte* bytes, int length, List<char>& ioBuffer) override + { // Note: Assumes bytes is Char32 aligned SLANG_ASSERT((size_t(bytes) & 3) == 0); - const Char32* content = (const Char32*)bytes; - for (int i = 0; i < (length >> 2); i++) - { - char buf[5]; - int count = encodeUnicodePointToUTF8(content[i], buf); + const Char32* content = (const Char32*)bytes; + for (int i = 0; i < (length >> 2); i++) + { + char buf[5]; + int count = encodeUnicodePointToUTF8(content[i], buf); for (int j = 0; j < count; j++) ioBuffer.addRange(buf, count); - } - } + } + } - Utf32CharEncoding() : Super(CharEncodeType::UTF32) {} + Utf32CharEncoding() + : Super(CharEncodeType::UTF32) + { + } }; -class Utf16CharEncoding : public CharEncoding //UTF16 +class Utf16CharEncoding : public CharEncoding // UTF16 { public: typedef CharEncoding Super; - Utf16CharEncoding(bool reverseOrder): - Super(reverseOrder ? CharEncodeType::UTF16Reversed : CharEncodeType::UTF16), - m_reverseOrder(reverseOrder) - {} - virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override - { - Index index = 0; - while (index < slice.getLength()) - { - const Char32 codePoint = getUnicodePointFromUTF8([&]() -> Byte - { - if (index < slice.getLength()) - return slice[index++]; - else - return '\0'; - }); - - Char16 buffer[2]; - int count; - if (!m_reverseOrder) - count = encodeUnicodePointToUTF16(codePoint, buffer); - else - count = encodeUnicodePointToUTF16Reversed(codePoint, buffer); + Utf16CharEncoding(bool reverseOrder) + : Super(reverseOrder ? CharEncodeType::UTF16Reversed : CharEncodeType::UTF16) + , m_reverseOrder(reverseOrder) + { + } + virtual void encode(const UnownedStringSlice& slice, List<Byte>& ioBuffer) override + { + Index index = 0; + while (index < slice.getLength()) + { + const Char32 codePoint = getUnicodePointFromUTF8( + [&]() -> Byte + { + if (index < slice.getLength()) + return slice[index++]; + else + return '\0'; + }); + + Char16 buffer[2]; + int count; + if (!m_reverseOrder) + count = encodeUnicodePointToUTF16(codePoint, buffer); + else + count = encodeUnicodePointToUTF16Reversed(codePoint, buffer); ioBuffer.addRange((const Byte*)buffer, count * 2); - } - } - virtual void decode(const Byte* bytes, int length, List<char>& ioBuffer) override - { - Index index = 0; - while (index < length) - { - auto readByte = [&]() -> Byte - { - return (index < length) ? bytes[index++] : Byte(0); - }; - const Char32 codePoint = m_reverseOrder ? - getUnicodePointFromUTF16Reversed(readByte) : - getUnicodePointFromUTF16(readByte); - - char buf[5]; - int count = encodeUnicodePointToUTF8(codePoint, buf); - ioBuffer.addRange((const char*)buf, count); - } - } + } + } + virtual void decode(const Byte* bytes, int length, List<char>& ioBuffer) override + { + Index index = 0; + while (index < length) + { + auto readByte = [&]() -> Byte { return (index < length) ? bytes[index++] : Byte(0); }; + const Char32 codePoint = m_reverseOrder ? getUnicodePointFromUTF16Reversed(readByte) + : getUnicodePointFromUTF16(readByte); + + char buf[5]; + int count = encodeUnicodePointToUTF8(codePoint, buf); + ioBuffer.addRange((const char*)buf, count); + } + } private: bool m_reverseOrder = false; }; -/* static */CharEncodeType CharEncoding::determineEncoding(const Byte* bytes, size_t bytesCount, size_t& outOffset) +/* static */ CharEncodeType CharEncoding::determineEncoding( + const Byte* bytes, + size_t bytesCount, + size_t& outOffset) { // TODO(JS): Assumes the bytes are suitably aligned @@ -137,7 +145,7 @@ private: // If we don't have a 'mark' byte then we are bit stumped. We'll look for // null (non-terminator) bytes and assume they mean we have a 16-bit encoding - for(size_t i = 0; i < (bytesCount-1); i += 2) + for (size_t i = 0; i < (bytesCount - 1); i += 2) { #if SLANG_LITTLE_ENDIAN const auto low = bytes[i]; @@ -164,41 +172,42 @@ static Utf16CharEncoding _utf16Encoding(false); static Utf16CharEncoding _utf16EncodingReversed(true); static Utf32CharEncoding _utf32Encoding; -/* static */CharEncoding* const CharEncoding::g_encoding[Index(CharEncodeType::CountOf)] -{ - &_utf8Encoding, // UTF8, - &_utf16Encoding, // UTF16, - &_utf16EncodingReversed, // UTF16Reversed, - &_utf32Encoding, // UTF32, +/* static */ CharEncoding* const CharEncoding::g_encoding[Index(CharEncodeType::CountOf)]{ + &_utf8Encoding, // UTF8, + &_utf16Encoding, // UTF16, + &_utf16EncodingReversed, // UTF16Reversed, + &_utf32Encoding, // UTF32, }; CharEncoding* CharEncoding::UTF8 = &_utf8Encoding; CharEncoding* CharEncoding::UTF16 = &_utf16Encoding; CharEncoding* CharEncoding::UTF16Reversed = &_utf16EncodingReversed; CharEncoding* CharEncoding::UTF32 = &_utf32Encoding; - + /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! UTF8Util !!!!!!!!!!!!!!!!!!!!!!!!! */ -/* static */Index UTF8Util::calcCodePointCount(const UnownedStringSlice& in) +/* static */ Index UTF8Util::calcCodePointCount(const UnownedStringSlice& in) { Index count = 0; // Analyse with bytes... const int8_t* cur = (const int8_t*)in.begin(); - const int8_t*const end = (const int8_t*)in.end(); + const int8_t* const end = (const int8_t*)in.end(); while (cur < end) { const auto c = *cur++; - + count++; // If c < 0 it means the top bit is set... which means we have multiple bytes if (c < 0) { // https://en.wikipedia.org/wiki/UTF-8 - // All continuation bytes contain exactly six bits from the code point.So the next six bits of the code point - /// are stored in the low order six bits of the next byte, and 10 is stored in the high order two bits to + // All continuation bytes contain exactly six bits from the code point.So the next six + // bits of the code point + /// are stored in the low order six bits of the next byte, and 10 is stored in the high + /// order two bits to // mark it as a continuation byte(so 10000010). while (cur < end && (*cur & 0xc0) == 0x80) @@ -217,7 +226,8 @@ Index UTF8Util::calcUTF16CharCount(const UnownedStringSlice& in) Index readPtr = 0; for (;;) { - int c = getUnicodePointFromUTF8([&]() -> Byte + int c = getUnicodePointFromUTF8( + [&]() -> Byte { if (readPtr < in.getLength()) return in[readPtr++]; |
