diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/compiler-core/slang-json-lexer.cpp | 2 | ||||
| -rw-r--r-- | source/compiler-core/slang-source-loc.cpp | 31 | ||||
| -rw-r--r-- | source/core/slang-char-encode.cpp | 25 |
3 files changed, 38 insertions, 20 deletions
diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp index 0476ca37a..a335403e1 100644 --- a/source/compiler-core/slang-json-lexer.cpp +++ b/source/compiler-core/slang-json-lexer.cpp @@ -238,7 +238,7 @@ JSONTokenType JSONLexer::advance() StringBuilder buf; if (c <= ' ' || c >= 0x7e) { - static const char s_hex[] = "012345679abcdef"; + static const char s_hex[] = "0123456789abcdef"; char hexBuf[5] = "0x"; diff --git a/source/compiler-core/slang-source-loc.cpp b/source/compiler-core/slang-source-loc.cpp index 872c40f0d..75601b815 100644 --- a/source/compiler-core/slang-source-loc.cpp +++ b/source/compiler-core/slang-source-loc.cpp @@ -573,15 +573,34 @@ int SourceFile::calcColumnIndex(int lineIndex, int offset, int tabSize) void SourceFile::setContents(ISlangBlob* blob) { - const UInt contentSize = blob->getBufferSize(); + const UInt rawContentSize = blob->getBufferSize(); - SLANG_ASSERT(contentSize == m_contentSize); + SLANG_ASSERT(rawContentSize == m_contentSize); - char const* contentBegin = (char const*)blob->getBufferPointer(); - char const* contentEnd = contentBegin + contentSize; + Byte* rawContentBegin = (Byte*)blob->getBufferPointer(); - m_contentBlob = blob; - m_content = UnownedStringSlice(contentBegin, contentEnd); + // Query the encoding type and discard the Unicode Byte-Order-Marker before decoding + size_t offset; + auto type = CharEncoding::determineEncoding( + rawContentBegin, + rawContentSize, + offset); + SLANG_ASSERT(rawContentSize >= offset); + + List<char> decodedBuffer; + CharEncoding::getEncoding(type)->decode( + rawContentBegin + offset, + int(rawContentSize - offset), + decodedBuffer); + + m_contentBlob = RawBlob::create(decodedBuffer.getBuffer(), decodedBuffer.getCount()); + + char const* decodedContentBegin = (char const*)m_contentBlob->getBufferPointer(); + const UInt decodedContentSize = m_contentBlob->getBufferSize(); + assert(decodedContentSize <= rawContentSize); + char const* decodedContentEnd = decodedContentBegin + decodedContentSize; + + m_content = UnownedStringSlice(decodedContentBegin, decodedContentEnd); } void SourceFile::setContents(const String& content) diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp index 687040fa2..105cfac7f 100644 --- a/source/core/slang-char-encode.cpp +++ b/source/core/slang-char-encode.cpp @@ -92,17 +92,17 @@ public: Index index = 0; while (index < length) { - const Char32 codePoint = getUnicodePointFromUTF16([&]() -> Byte + auto readByte = [&]() -> Byte { - if (index < length) - return bytes[index++]; - else - return Byte(0); - }); + return (index < length) ? bytes[index++] : Byte(0); + }; + const Char32 codePoint = m_reverseOrder ? + getUnicodePointFromUTF16Reversed(readByte) : + getUnicodePointFromUTF16(readByte); char buf[5]; int count = encodeUnicodePointToUTF8(codePoint, buf); - ioBuffer.addRange((const char*)buf, count); + ioBuffer.addRange((const char*)buf, count); } } @@ -134,11 +134,10 @@ private: outOffset = 2; return CharEncodeType::UTF16Reversed; } - } - else - { - // If we don't have a 'mark' byte then we are bit stumped. We'll look for a null bytes and assume they mean we have a 16 bit encoding - for (size_t i = 0; i < bytesCount; i += 2) + + // If we don't have a 'mark' byte then we are bit stumped. We'll look for + // null (non-terminator) bytes and assume they mean we have a 16-bit encoding + for(size_t i = 0; i < (bytesCount-1); i += 2) { #if SLANG_LITTLE_ENDIAN const auto low = bytes[i]; @@ -146,7 +145,7 @@ private: #else const auto low = bytes[i + 1]; const auto high = bytes[i]; -#endif +#endif if ((low == 0) ^ (high == 0)) { outOffset = 2; |
