diff options
| -rw-r--r-- | source/compiler-core/slang-json-lexer.cpp | 2 | ||||
| -rw-r--r-- | source/compiler-core/slang-source-loc.cpp | 31 | ||||
| -rw-r--r-- | source/core/slang-char-encode.cpp | 25 | ||||
| -rw-r--r-- | tests/bugs/implicit-cast.slang.expected.txt | 5 | ||||
| -rw-r--r-- | tests/preprocessor/utf16_be_bom_crlf.slang | bin | 0 -> 70 bytes | |||
| -rw-r--r-- | tests/preprocessor/utf16_le_bom_crlf.slang | bin | 0 -> 70 bytes | |||
| -rw-r--r-- | tests/preprocessor/utf8_bom_crlf.slang | 4 |
7 files changed, 46 insertions, 21 deletions
diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp index 0476ca37a..a335403e1 100644 --- a/source/compiler-core/slang-json-lexer.cpp +++ b/source/compiler-core/slang-json-lexer.cpp @@ -238,7 +238,7 @@ JSONTokenType JSONLexer::advance() StringBuilder buf; if (c <= ' ' || c >= 0x7e) { - static const char s_hex[] = "012345679abcdef"; + static const char s_hex[] = "0123456789abcdef"; char hexBuf[5] = "0x"; diff --git a/source/compiler-core/slang-source-loc.cpp b/source/compiler-core/slang-source-loc.cpp index 872c40f0d..75601b815 100644 --- a/source/compiler-core/slang-source-loc.cpp +++ b/source/compiler-core/slang-source-loc.cpp @@ -573,15 +573,34 @@ int SourceFile::calcColumnIndex(int lineIndex, int offset, int tabSize) void SourceFile::setContents(ISlangBlob* blob) { - const UInt contentSize = blob->getBufferSize(); + const UInt rawContentSize = blob->getBufferSize(); - SLANG_ASSERT(contentSize == m_contentSize); + SLANG_ASSERT(rawContentSize == m_contentSize); - char const* contentBegin = (char const*)blob->getBufferPointer(); - char const* contentEnd = contentBegin + contentSize; + Byte* rawContentBegin = (Byte*)blob->getBufferPointer(); - m_contentBlob = blob; - m_content = UnownedStringSlice(contentBegin, contentEnd); + // Query the encoding type and discard the Unicode Byte-Order-Marker before decoding + size_t offset; + auto type = CharEncoding::determineEncoding( + rawContentBegin, + rawContentSize, + offset); + SLANG_ASSERT(rawContentSize >= offset); + + List<char> decodedBuffer; + CharEncoding::getEncoding(type)->decode( + rawContentBegin + offset, + int(rawContentSize - offset), + decodedBuffer); + + m_contentBlob = RawBlob::create(decodedBuffer.getBuffer(), decodedBuffer.getCount()); + + char const* decodedContentBegin = (char const*)m_contentBlob->getBufferPointer(); + const UInt decodedContentSize = m_contentBlob->getBufferSize(); + assert(decodedContentSize <= rawContentSize); + char const* decodedContentEnd = decodedContentBegin + decodedContentSize; + + m_content = UnownedStringSlice(decodedContentBegin, decodedContentEnd); } void SourceFile::setContents(const String& content) diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp index 687040fa2..105cfac7f 100644 --- a/source/core/slang-char-encode.cpp +++ b/source/core/slang-char-encode.cpp @@ -92,17 +92,17 @@ public: Index index = 0; while (index < length) { - const Char32 codePoint = getUnicodePointFromUTF16([&]() -> Byte + auto readByte = [&]() -> Byte { - if (index < length) - return bytes[index++]; - else - return Byte(0); - }); + return (index < length) ? bytes[index++] : Byte(0); + }; + const Char32 codePoint = m_reverseOrder ? + getUnicodePointFromUTF16Reversed(readByte) : + getUnicodePointFromUTF16(readByte); char buf[5]; int count = encodeUnicodePointToUTF8(codePoint, buf); - ioBuffer.addRange((const char*)buf, count); + ioBuffer.addRange((const char*)buf, count); } } @@ -134,11 +134,10 @@ private: outOffset = 2; return CharEncodeType::UTF16Reversed; } - } - else - { - // If we don't have a 'mark' byte then we are bit stumped. We'll look for a null bytes and assume they mean we have a 16 bit encoding - for (size_t i = 0; i < bytesCount; i += 2) + + // If we don't have a 'mark' byte then we are bit stumped. We'll look for + // null (non-terminator) bytes and assume they mean we have a 16-bit encoding + for(size_t i = 0; i < (bytesCount-1); i += 2) { #if SLANG_LITTLE_ENDIAN const auto low = bytes[i]; @@ -146,7 +145,7 @@ private: #else const auto low = bytes[i + 1]; const auto high = bytes[i]; -#endif +#endif if ((low == 0) ^ (high == 0)) { outOffset = 2; diff --git a/tests/bugs/implicit-cast.slang.expected.txt b/tests/bugs/implicit-cast.slang.expected.txt index d8263ee98..5a4d7b6ab 100644 --- a/tests/bugs/implicit-cast.slang.expected.txt +++ b/tests/bugs/implicit-cast.slang.expected.txt @@ -1 +1,4 @@ -2
\ No newline at end of file +0 +1 +0 +0 diff --git a/tests/preprocessor/utf16_be_bom_crlf.slang b/tests/preprocessor/utf16_be_bom_crlf.slang Binary files differnew file mode 100644 index 000000000..eba197eff --- /dev/null +++ b/tests/preprocessor/utf16_be_bom_crlf.slang diff --git a/tests/preprocessor/utf16_le_bom_crlf.slang b/tests/preprocessor/utf16_le_bom_crlf.slang Binary files differnew file mode 100644 index 000000000..e0005e1e7 --- /dev/null +++ b/tests/preprocessor/utf16_le_bom_crlf.slang diff --git a/tests/preprocessor/utf8_bom_crlf.slang b/tests/preprocessor/utf8_bom_crlf.slang new file mode 100644 index 000000000..bc8a6b12b --- /dev/null +++ b/tests/preprocessor/utf8_bom_crlf.slang @@ -0,0 +1,4 @@ +void main()
+{
+}
+//TEST:SIMPLE: |
