summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/compiler-core/slang-json-lexer.cpp2
-rw-r--r--source/compiler-core/slang-source-loc.cpp31
-rw-r--r--source/core/slang-char-encode.cpp25
3 files changed, 38 insertions, 20 deletions
diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp
index 0476ca37a..a335403e1 100644
--- a/source/compiler-core/slang-json-lexer.cpp
+++ b/source/compiler-core/slang-json-lexer.cpp
@@ -238,7 +238,7 @@ JSONTokenType JSONLexer::advance()
StringBuilder buf;
if (c <= ' ' || c >= 0x7e)
{
- static const char s_hex[] = "012345679abcdef";
+ static const char s_hex[] = "0123456789abcdef";
char hexBuf[5] = "0x";
diff --git a/source/compiler-core/slang-source-loc.cpp b/source/compiler-core/slang-source-loc.cpp
index 872c40f0d..75601b815 100644
--- a/source/compiler-core/slang-source-loc.cpp
+++ b/source/compiler-core/slang-source-loc.cpp
@@ -573,15 +573,34 @@ int SourceFile::calcColumnIndex(int lineIndex, int offset, int tabSize)
void SourceFile::setContents(ISlangBlob* blob)
{
- const UInt contentSize = blob->getBufferSize();
+ const UInt rawContentSize = blob->getBufferSize();
- SLANG_ASSERT(contentSize == m_contentSize);
+ SLANG_ASSERT(rawContentSize == m_contentSize);
- char const* contentBegin = (char const*)blob->getBufferPointer();
- char const* contentEnd = contentBegin + contentSize;
+ Byte* rawContentBegin = (Byte*)blob->getBufferPointer();
- m_contentBlob = blob;
- m_content = UnownedStringSlice(contentBegin, contentEnd);
+ // Query the encoding type and discard the Unicode Byte-Order-Marker before decoding
+ size_t offset;
+ auto type = CharEncoding::determineEncoding(
+ rawContentBegin,
+ rawContentSize,
+ offset);
+ SLANG_ASSERT(rawContentSize >= offset);
+
+ List<char> decodedBuffer;
+ CharEncoding::getEncoding(type)->decode(
+ rawContentBegin + offset,
+ int(rawContentSize - offset),
+ decodedBuffer);
+
+ m_contentBlob = RawBlob::create(decodedBuffer.getBuffer(), decodedBuffer.getCount());
+
+ char const* decodedContentBegin = (char const*)m_contentBlob->getBufferPointer();
+ const UInt decodedContentSize = m_contentBlob->getBufferSize();
+ assert(decodedContentSize <= rawContentSize);
+ char const* decodedContentEnd = decodedContentBegin + decodedContentSize;
+
+ m_content = UnownedStringSlice(decodedContentBegin, decodedContentEnd);
}
void SourceFile::setContents(const String& content)
diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp
index 687040fa2..105cfac7f 100644
--- a/source/core/slang-char-encode.cpp
+++ b/source/core/slang-char-encode.cpp
@@ -92,17 +92,17 @@ public:
Index index = 0;
while (index < length)
{
- const Char32 codePoint = getUnicodePointFromUTF16([&]() -> Byte
+ auto readByte = [&]() -> Byte
{
- if (index < length)
- return bytes[index++];
- else
- return Byte(0);
- });
+ return (index < length) ? bytes[index++] : Byte(0);
+ };
+ const Char32 codePoint = m_reverseOrder ?
+ getUnicodePointFromUTF16Reversed(readByte) :
+ getUnicodePointFromUTF16(readByte);
char buf[5];
int count = encodeUnicodePointToUTF8(codePoint, buf);
- ioBuffer.addRange((const char*)buf, count);
+ ioBuffer.addRange((const char*)buf, count);
}
}
@@ -134,11 +134,10 @@ private:
outOffset = 2;
return CharEncodeType::UTF16Reversed;
}
- }
- else
- {
- // If we don't have a 'mark' byte then we are bit stumped. We'll look for a null bytes and assume they mean we have a 16 bit encoding
- for (size_t i = 0; i < bytesCount; i += 2)
+
+ // If we don't have a 'mark' byte then we are bit stumped. We'll look for
+ // null (non-terminator) bytes and assume they mean we have a 16-bit encoding
+ for(size_t i = 0; i < (bytesCount-1); i += 2)
{
#if SLANG_LITTLE_ENDIAN
const auto low = bytes[i];
@@ -146,7 +145,7 @@ private:
#else
const auto low = bytes[i + 1];
const auto high = bytes[i];
-#endif
+#endif
if ((low == 0) ^ (high == 0))
{
outOffset = 2;