summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--source/compiler-core/slang-json-lexer.cpp2
-rw-r--r--source/compiler-core/slang-source-loc.cpp31
-rw-r--r--source/core/slang-char-encode.cpp25
-rw-r--r--tests/bugs/implicit-cast.slang.expected.txt5
-rw-r--r--tests/preprocessor/utf16_be_bom_crlf.slangbin0 -> 70 bytes
-rw-r--r--tests/preprocessor/utf16_le_bom_crlf.slangbin0 -> 70 bytes
-rw-r--r--tests/preprocessor/utf8_bom_crlf.slang4
7 files changed, 46 insertions, 21 deletions
diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp
index 0476ca37a..a335403e1 100644
--- a/source/compiler-core/slang-json-lexer.cpp
+++ b/source/compiler-core/slang-json-lexer.cpp
@@ -238,7 +238,7 @@ JSONTokenType JSONLexer::advance()
StringBuilder buf;
if (c <= ' ' || c >= 0x7e)
{
- static const char s_hex[] = "012345679abcdef";
+ static const char s_hex[] = "0123456789abcdef";
char hexBuf[5] = "0x";
diff --git a/source/compiler-core/slang-source-loc.cpp b/source/compiler-core/slang-source-loc.cpp
index 872c40f0d..75601b815 100644
--- a/source/compiler-core/slang-source-loc.cpp
+++ b/source/compiler-core/slang-source-loc.cpp
@@ -573,15 +573,34 @@ int SourceFile::calcColumnIndex(int lineIndex, int offset, int tabSize)
void SourceFile::setContents(ISlangBlob* blob)
{
- const UInt contentSize = blob->getBufferSize();
+ const UInt rawContentSize = blob->getBufferSize();
- SLANG_ASSERT(contentSize == m_contentSize);
+ SLANG_ASSERT(rawContentSize == m_contentSize);
- char const* contentBegin = (char const*)blob->getBufferPointer();
- char const* contentEnd = contentBegin + contentSize;
+ Byte* rawContentBegin = (Byte*)blob->getBufferPointer();
- m_contentBlob = blob;
- m_content = UnownedStringSlice(contentBegin, contentEnd);
+ // Query the encoding type and discard the Unicode Byte-Order-Marker before decoding
+ size_t offset;
+ auto type = CharEncoding::determineEncoding(
+ rawContentBegin,
+ rawContentSize,
+ offset);
+ SLANG_ASSERT(rawContentSize >= offset);
+
+ List<char> decodedBuffer;
+ CharEncoding::getEncoding(type)->decode(
+ rawContentBegin + offset,
+ int(rawContentSize - offset),
+ decodedBuffer);
+
+ m_contentBlob = RawBlob::create(decodedBuffer.getBuffer(), decodedBuffer.getCount());
+
+ char const* decodedContentBegin = (char const*)m_contentBlob->getBufferPointer();
+ const UInt decodedContentSize = m_contentBlob->getBufferSize();
+ assert(decodedContentSize <= rawContentSize);
+ char const* decodedContentEnd = decodedContentBegin + decodedContentSize;
+
+ m_content = UnownedStringSlice(decodedContentBegin, decodedContentEnd);
}
void SourceFile::setContents(const String& content)
diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp
index 687040fa2..105cfac7f 100644
--- a/source/core/slang-char-encode.cpp
+++ b/source/core/slang-char-encode.cpp
@@ -92,17 +92,17 @@ public:
Index index = 0;
while (index < length)
{
- const Char32 codePoint = getUnicodePointFromUTF16([&]() -> Byte
+ auto readByte = [&]() -> Byte
{
- if (index < length)
- return bytes[index++];
- else
- return Byte(0);
- });
+ return (index < length) ? bytes[index++] : Byte(0);
+ };
+ const Char32 codePoint = m_reverseOrder ?
+ getUnicodePointFromUTF16Reversed(readByte) :
+ getUnicodePointFromUTF16(readByte);
char buf[5];
int count = encodeUnicodePointToUTF8(codePoint, buf);
- ioBuffer.addRange((const char*)buf, count);
+ ioBuffer.addRange((const char*)buf, count);
}
}
@@ -134,11 +134,10 @@ private:
outOffset = 2;
return CharEncodeType::UTF16Reversed;
}
- }
- else
- {
- // If we don't have a 'mark' byte then we are bit stumped. We'll look for a null bytes and assume they mean we have a 16 bit encoding
- for (size_t i = 0; i < bytesCount; i += 2)
+
+ // If we don't have a 'mark' byte then we are bit stumped. We'll look for
+ // null (non-terminator) bytes and assume they mean we have a 16-bit encoding
+ for(size_t i = 0; i < (bytesCount-1); i += 2)
{
#if SLANG_LITTLE_ENDIAN
const auto low = bytes[i];
@@ -146,7 +145,7 @@ private:
#else
const auto low = bytes[i + 1];
const auto high = bytes[i];
-#endif
+#endif
if ((low == 0) ^ (high == 0))
{
outOffset = 2;
diff --git a/tests/bugs/implicit-cast.slang.expected.txt b/tests/bugs/implicit-cast.slang.expected.txt
index d8263ee98..5a4d7b6ab 100644
--- a/tests/bugs/implicit-cast.slang.expected.txt
+++ b/tests/bugs/implicit-cast.slang.expected.txt
@@ -1 +1,4 @@
-2 \ No newline at end of file
+0
+1
+0
+0
diff --git a/tests/preprocessor/utf16_be_bom_crlf.slang b/tests/preprocessor/utf16_be_bom_crlf.slang
new file mode 100644
index 000000000..eba197eff
--- /dev/null
+++ b/tests/preprocessor/utf16_be_bom_crlf.slang
Binary files differ
diff --git a/tests/preprocessor/utf16_le_bom_crlf.slang b/tests/preprocessor/utf16_le_bom_crlf.slang
new file mode 100644
index 000000000..e0005e1e7
--- /dev/null
+++ b/tests/preprocessor/utf16_le_bom_crlf.slang
Binary files differ
diff --git a/tests/preprocessor/utf8_bom_crlf.slang b/tests/preprocessor/utf8_bom_crlf.slang
new file mode 100644
index 000000000..bc8a6b12b
--- /dev/null
+++ b/tests/preprocessor/utf8_bom_crlf.slang
@@ -0,0 +1,4 @@
+void main()
+{
+}
+//TEST:SIMPLE: