7 files changed, 46 insertions, 21 deletions
diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp
index 0476ca37a..a335403e1 100644
--- a/source/compiler-core/slang-json-lexer.cpp
+++ b/source/compiler-core/slang-json-lexer.cpp
@@ -238,7 +238,7 @@ JSONTokenType JSONLexer::advance()
                 StringBuilder buf;
                 if (c <= ' ' || c >= 0x7e)
                 {
-                    static const char s_hex[] = "012345679abcdef";
+                    static const char s_hex[] = "0123456789abcdef";
 
                     char hexBuf[5] = "0x";
       
diff --git a/source/compiler-core/slang-source-loc.cpp b/source/compiler-core/slang-source-loc.cpp
index 872c40f0d..75601b815 100644
--- a/source/compiler-core/slang-source-loc.cpp
+++ b/source/compiler-core/slang-source-loc.cpp
@@ -573,15 +573,34 @@ int SourceFile::calcColumnIndex(int lineIndex, int offset, int tabSize)
 
 void SourceFile::setContents(ISlangBlob* blob)
 {
-    const UInt contentSize = blob->getBufferSize();
+    const UInt rawContentSize = blob->getBufferSize();
 
-    SLANG_ASSERT(contentSize == m_contentSize);
+    SLANG_ASSERT(rawContentSize == m_contentSize);
 
-    char const* contentBegin = (char const*)blob->getBufferPointer();
-    char const* contentEnd = contentBegin + contentSize;
+    Byte* rawContentBegin = (Byte*)blob->getBufferPointer();
 
-    m_contentBlob = blob;
-    m_content = UnownedStringSlice(contentBegin, contentEnd);
+    // Query the encoding type and discard the Unicode Byte-Order-Marker before decoding
+    size_t offset;
+    auto type = CharEncoding::determineEncoding(
+        rawContentBegin,
+        rawContentSize,
+        offset);
+    SLANG_ASSERT(rawContentSize >= offset);
+
+    List<char> decodedBuffer;
+    CharEncoding::getEncoding(type)->decode(
+        rawContentBegin + offset,
+        int(rawContentSize - offset),
+        decodedBuffer);
+
+    m_contentBlob = RawBlob::create(decodedBuffer.getBuffer(), decodedBuffer.getCount());
+
+    char const* decodedContentBegin = (char const*)m_contentBlob->getBufferPointer();
+    const UInt decodedContentSize = m_contentBlob->getBufferSize();
+    assert(decodedContentSize <= rawContentSize);
+    char const* decodedContentEnd = decodedContentBegin + decodedContentSize;
+
+    m_content = UnownedStringSlice(decodedContentBegin, decodedContentEnd);
 }
 
 void SourceFile::setContents(const String& content)
diff --git a/source/core/slang-char-encode.cpp b/source/core/slang-char-encode.cpp
index 687040fa2..105cfac7f 100644
--- a/source/core/slang-char-encode.cpp
+++ b/source/core/slang-char-encode.cpp
@@ -92,17 +92,17 @@ public:
 		Index index = 0;
 		while (index < length)
 		{
-			const Char32 codePoint = getUnicodePointFromUTF16([&]() -> Byte
+			auto readByte = [&]() -> Byte
 			{
-                if (index < length)
-                    return bytes[index++];
-                else
-                    return Byte(0);
-			});
+				return (index < length) ? bytes[index++] : Byte(0);
+			};
+			const Char32 codePoint = m_reverseOrder ?
+				getUnicodePointFromUTF16Reversed(readByte) :
+				getUnicodePointFromUTF16(readByte);
 
 			char buf[5];
 			int count = encodeUnicodePointToUTF8(codePoint, buf);
-            ioBuffer.addRange((const char*)buf, count);
+			ioBuffer.addRange((const char*)buf, count);
 		}
 	}
 
@@ -134,11 +134,10 @@ private:
             outOffset = 2;
             return CharEncodeType::UTF16Reversed;
         }
-    }
-    else
-    {
-        // If we don't have a 'mark' byte then we are bit stumped. We'll look for a null bytes and assume they mean we have a 16 bit encoding
-        for (size_t i = 0; i < bytesCount; i += 2)
+
+        // If we don't have a 'mark' byte then we are bit stumped. We'll look for
+        // null (non-terminator) bytes and assume they mean we have a 16-bit encoding
+        for(size_t i = 0; i < (bytesCount-1); i += 2)
         {
 #if SLANG_LITTLE_ENDIAN
             const auto low = bytes[i];
@@ -146,7 +145,7 @@ private:
 #else
             const auto low = bytes[i + 1];
             const auto high = bytes[i];
-#endif 
+#endif
             if ((low == 0) ^ (high == 0))
             {
                 outOffset = 2;
diff --git a/tests/bugs/implicit-cast.slang.expected.txt b/tests/bugs/implicit-cast.slang.expected.txt
index d8263ee98..5a4d7b6ab 100644
--- a/tests/bugs/implicit-cast.slang.expected.txt
+++ b/tests/bugs/implicit-cast.slang.expected.txt
@@ -1 +1,4 @@
-2
-\ No newline at end of file
+0
+1
+0
+0
diff --git a/tests/preprocessor/utf16_be_bom_crlf.slang b/tests/preprocessor/utf16_be_bom_crlf.slang
new file mode 100644
index 000000000..eba197eff
--- /dev/null
+++ b/tests/preprocessor/utf16_be_bom_crlf.slang
diff --git a/tests/preprocessor/utf16_le_bom_crlf.slang b/tests/preprocessor/utf16_le_bom_crlf.slang
new file mode 100644
index 000000000..e0005e1e7
--- /dev/null
+++ b/tests/preprocessor/utf16_le_bom_crlf.slang
diff --git a/tests/preprocessor/utf8_bom_crlf.slang b/tests/preprocessor/utf8_bom_crlf.slang
new file mode 100644
index 000000000..bc8a6b12b
--- /dev/null
+++ b/tests/preprocessor/utf8_bom_crlf.slang
@@ -0,0 +1,4 @@
+void main()
+{
+}
+//TEST:SIMPLE: