summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-05-25 20:58:43 -0400
committerGitHub <noreply@github.com>2021-05-25 20:58:43 -0400
commit7d1b8ac13faf80ed56b37243480d097059da5aab (patch)
tree6613b13983083d16b8945c6d92b1f4f1d1fb2501
parent89f67d9c626fa193dba4adafcb54e46b13aa5e98 (diff)
JSON Lexing and string encoding/decoding (#1858)
* #include an absolute path didn't work - because paths were taken to always be relative. * WIP Json lexer. * Check JSON Lex with unit test * Add JSON escaping/unescaping of strings. * Big fix encoding/decoding. * Fix typo in JSON diagnostics. * Fix typo. * Better float testing.
-rw-r--r--build/visual-studio/compiler-core/compiler-core.vcxproj5
-rw-r--r--build/visual-studio/compiler-core/compiler-core.vcxproj.filters15
-rw-r--r--build/visual-studio/slang-test/slang-test.vcxproj1
-rw-r--r--build/visual-studio/slang-test/slang-test.vcxproj.filters3
-rw-r--r--source/compiler-core/slang-json-diagnostic-defs.h37
-rw-r--r--source/compiler-core/slang-json-diagnostics.cpp33
-rw-r--r--source/compiler-core/slang-json-diagnostics.h26
-rw-r--r--source/compiler-core/slang-json-lexer.cpp385
-rw-r--r--source/compiler-core/slang-json-lexer.h89
-rw-r--r--source/core/slang-string-escape-util.cpp373
-rw-r--r--source/core/slang-string-escape-util.h1
-rw-r--r--tools/slang-test/unit-test-json.cpp180
12 files changed, 1148 insertions, 0 deletions
diff --git a/build/visual-studio/compiler-core/compiler-core.vcxproj b/build/visual-studio/compiler-core/compiler-core.vcxproj
index e6883900c..7d2ac6afa 100644
--- a/build/visual-studio/compiler-core/compiler-core.vcxproj
+++ b/build/visual-studio/compiler-core/compiler-core.vcxproj
@@ -179,6 +179,9 @@
<ClInclude Include="..\..\..\source\compiler-core\slang-gcc-compiler-util.h" />
<ClInclude Include="..\..\..\source\compiler-core\slang-glslang-compiler.h" />
<ClInclude Include="..\..\..\source\compiler-core\slang-include-system.h" />
+ <ClInclude Include="..\..\..\source\compiler-core\slang-json-diagnostic-defs.h" />
+ <ClInclude Include="..\..\..\source\compiler-core\slang-json-diagnostics.h" />
+ <ClInclude Include="..\..\..\source\compiler-core\slang-json-lexer.h" />
<ClInclude Include="..\..\..\source\compiler-core\slang-lexer-diagnostic-defs.h" />
<ClInclude Include="..\..\..\source\compiler-core\slang-lexer.h" />
<ClInclude Include="..\..\..\source\compiler-core\slang-misc-diagnostic-defs.h" />
@@ -201,6 +204,8 @@
<ClCompile Include="..\..\..\source\compiler-core\slang-gcc-compiler-util.cpp" />
<ClCompile Include="..\..\..\source\compiler-core\slang-glslang-compiler.cpp" />
<ClCompile Include="..\..\..\source\compiler-core\slang-include-system.cpp" />
+ <ClCompile Include="..\..\..\source\compiler-core\slang-json-diagnostics.cpp" />
+ <ClCompile Include="..\..\..\source\compiler-core\slang-json-lexer.cpp" />
<ClCompile Include="..\..\..\source\compiler-core\slang-lexer.cpp" />
<ClCompile Include="..\..\..\source\compiler-core\slang-name-convention-util.cpp" />
<ClCompile Include="..\..\..\source\compiler-core\slang-name.cpp" />
diff --git a/build/visual-studio/compiler-core/compiler-core.vcxproj.filters b/build/visual-studio/compiler-core/compiler-core.vcxproj.filters
index d35f9941c..243eecd5b 100644
--- a/build/visual-studio/compiler-core/compiler-core.vcxproj.filters
+++ b/build/visual-studio/compiler-core/compiler-core.vcxproj.filters
@@ -36,6 +36,15 @@
<ClInclude Include="..\..\..\source\compiler-core\slang-include-system.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="..\..\..\source\compiler-core\slang-json-diagnostic-defs.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\..\source\compiler-core\slang-json-diagnostics.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\..\source\compiler-core\slang-json-lexer.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="..\..\..\source\compiler-core\slang-lexer-diagnostic-defs.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -98,6 +107,12 @@
<ClCompile Include="..\..\..\source\compiler-core\slang-include-system.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\..\source\compiler-core\slang-json-diagnostics.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\source\compiler-core\slang-json-lexer.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\source\compiler-core\slang-lexer.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/build/visual-studio/slang-test/slang-test.vcxproj b/build/visual-studio/slang-test/slang-test.vcxproj
index 24d2f2ac1..22a8aae5f 100644
--- a/build/visual-studio/slang-test/slang-test.vcxproj
+++ b/build/visual-studio/slang-test/slang-test.vcxproj
@@ -183,6 +183,7 @@
<ClCompile Include="..\..\..\tools\slang-test\unit-test-compression.cpp" />
<ClCompile Include="..\..\..\tools\slang-test\unit-test-find-type-by-name.cpp" />
<ClCompile Include="..\..\..\tools\slang-test\unit-test-free-list.cpp" />
+ <ClCompile Include="..\..\..\tools\slang-test\unit-test-json.cpp" />
<ClCompile Include="..\..\..\tools\slang-test\unit-test-memory-arena.cpp" />
<ClCompile Include="..\..\..\tools\slang-test\unit-test-path.cpp" />
<ClCompile Include="..\..\..\tools\slang-test\unit-test-riff.cpp" />
diff --git a/build/visual-studio/slang-test/slang-test.vcxproj.filters b/build/visual-studio/slang-test/slang-test.vcxproj.filters
index 9b7138eef..1e5b6e4af 100644
--- a/build/visual-studio/slang-test/slang-test.vcxproj.filters
+++ b/build/visual-studio/slang-test/slang-test.vcxproj.filters
@@ -68,6 +68,9 @@
<ClCompile Include="..\..\..\tools\slang-test\unit-test-free-list.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\..\tools\slang-test\unit-test-json.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\..\..\tools\slang-test\unit-test-memory-arena.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/source/compiler-core/slang-json-diagnostic-defs.h b/source/compiler-core/slang-json-diagnostic-defs.h
new file mode 100644
index 000000000..a4b260857
--- /dev/null
+++ b/source/compiler-core/slang-json-diagnostic-defs.h
@@ -0,0 +1,37 @@
+//
+
+// The file is meant to be included multiple times, to produce different
+// pieces of declaration/definition code related to diagnostic messages
+//
+// Each diagnostic is declared here with:
+//
+// DIAGNOSTIC(id, severity, name, messageFormat)
+//
+// Where `id` is the unique diagnostic ID, `severity` is the default
+// severity (from the `Severity` enum), `name` is a name used to refer
+// to this diagnostic from code, and `messageFormat` is the default
+// (non-localized) message for the diagnostic, with placeholders
+// for any arguments.
+
+#ifndef DIAGNOSTIC
+#error Need to #define DIAGNOSTIC(...) before including
+#define DIAGNOSTIC(id, severity, name, messageFormat) /* */
+#endif
+
+//
+// -1 - Notes that decorate another diagnostic.
+//
+
+//
+// 2xxxx - JSON Lexical analysis
+//
+
+DIAGNOSTIC(20000, Error, unexpectedCharacter, "unexpected character '$0'")
+DIAGNOSTIC(20001, Error, endOfFileInLiteral, "end of file in literal")
+DIAGNOSTIC(20002, Error, newlineInLiteral, "newline in literal")
+DIAGNOSTIC(20003, Error, endOfFileInComment, "end of file in comment")
+DIAGNOSTIC(20004, Error, expectingAHexDigit, "expecting a hex digit")
+DIAGNOSTIC(20005, Error, expectingADigit, "expecting a digit")
+DIAGNOSTIC(20006, Error, expectingValueName, "expecting value name [null, true, false]")
+
+#undef DIAGNOSTIC
diff --git a/source/compiler-core/slang-json-diagnostics.cpp b/source/compiler-core/slang-json-diagnostics.cpp
new file mode 100644
index 000000000..1d35e8faf
--- /dev/null
+++ b/source/compiler-core/slang-json-diagnostics.cpp
@@ -0,0 +1,33 @@
+// slang-json-diagnostics.cpp
+#include "slang-json-diagnostics.h"
+
+namespace Slang {
+
+namespace JSONDiagnostics
+{
+#define DIAGNOSTIC(id, severity, name, messageFormat) const DiagnosticInfo name = { id, Severity::severity, #name, messageFormat };
+#include "slang-json-diagnostic-defs.h"
+#undef DIAGNOSTIC
+}
+
+static const DiagnosticInfo* const kJSONDiagnostics[] =
+{
+#define DIAGNOSTIC(id, severity, name, messageFormat) &JSONDiagnostics::name,
+#include "slang-json-diagnostic-defs.h"
+#undef DIAGNOSTIC
+};
+
+static DiagnosticsLookup* _newJSONDiagnosticsLookup()
+{
+ auto lookup = new DiagnosticsLookup;
+ lookup->add(kJSONDiagnostics, SLANG_COUNT_OF(kJSONDiagnostics));
+ return lookup;
+}
+
+DiagnosticsLookup* getJSONDiagnosticsLookup()
+{
+ static RefPtr<DiagnosticsLookup> s_lookup = _newJSONDiagnosticsLookup();
+ return s_lookup;
+}
+
+} // namespace Slang
diff --git a/source/compiler-core/slang-json-diagnostics.h b/source/compiler-core/slang-json-diagnostics.h
new file mode 100644
index 000000000..88ec0c550
--- /dev/null
+++ b/source/compiler-core/slang-json-diagnostics.h
@@ -0,0 +1,26 @@
+#ifndef SLANG_JSON_DIAGNOSTICS_H
+#define SLANG_JSON_DIAGNOSTICS_H
+
+#include "../core/slang-basic.h"
+#include "../core/slang-writer.h"
+
+#include "slang-source-loc.h"
+#include "slang-diagnostic-sink.h"
+#include "slang-token.h"
+
+#include "../../slang.h"
+
+namespace Slang
+{
+
+DiagnosticsLookup* getJSONDiagnosticsLookup();
+
+namespace JSONDiagnostics
+{
+#define DIAGNOSTIC(id, severity, name, messageFormat) extern const DiagnosticInfo name;
+#include "slang-json-diagnostic-defs.h"
+}
+
+}
+
+#endif
diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp
new file mode 100644
index 000000000..19a5b29a7
--- /dev/null
+++ b/source/compiler-core/slang-json-lexer.cpp
@@ -0,0 +1,385 @@
+// slang-json-lexer.cpp
+#include "slang-json-lexer.h"
+
+#include "slang-json-diagnostics.h"
+#include "../core/slang-char-util.h"
+
+/*
+https://www.json.org/json-en.html
+*/
+
+namespace Slang {
+
+SlangResult JSONLexer::init(SourceView* sourceView, DiagnosticSink* sink)
+{
+ m_sourceView = sourceView;
+ m_sink = sink;
+
+ SourceFile* sourceFile = sourceView->getSourceFile();
+
+ // Note that the content must be null terminated (because of other requirements)
+ SLANG_ASSERT(sourceFile && sourceFile->hasContent());
+
+ m_contentStart = sourceFile->getContent().begin();
+
+ m_startLoc = sourceView->getRange().begin;
+
+ m_lexemeStart = m_contentStart;
+ m_cursor = m_lexemeStart;
+
+ // We need to prime the first token
+ advance();
+
+ return SLANG_OK;
+}
+
+SLANG_FORCE_INLINE static const char* _handleEndOfLine(char c, const char* cursor)
+{
+ SLANG_ASSERT(c == '\n' || c == '\r');
+ const char d = *cursor;
+ return cursor + Index((c ^ d) == ('\n' ^ '\r'));
+}
+
+JSONTokenType JSONLexer::_setInvalidToken()
+{
+ return _setToken(JSONTokenType::Invalid, m_lexemeStart);
+}
+
+JSONTokenType JSONLexer::advance()
+{
+ const char* cursor = m_cursor;
+
+ while (true)
+ {
+ m_lexemeStart = cursor;
+
+ const char c = *cursor++;
+
+ switch (c)
+ {
+ case 0: return _setToken(JSONTokenType::EndOfFile, cursor - 1);
+ case '"':
+ {
+ cursor = _lexString(cursor);
+ if (cursor == nullptr)
+ {
+ return _setInvalidToken();
+ }
+ return _setToken(JSONTokenType::StringLiteral, cursor);
+ }
+ case '/':
+ {
+ // We allow comments
+ const char nextChar = *m_cursor;
+
+ if (nextChar == '/')
+ {
+ // Line comment
+ cursor = _lexLineComment(cursor);
+ break;
+ }
+ else if (nextChar == '*')
+ {
+ cursor = _lexBlockComment(cursor);
+ // Can fail...
+ if (cursor == nullptr)
+ {
+ return _setInvalidToken();
+ }
+ break;
+ }
+ }
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ {
+ cursor = _lexWhitespace(cursor);
+ break;
+ }
+ case ':': return _setToken(JSONTokenType::Colon, cursor);
+ case ',': return _setToken(JSONTokenType::Comma, cursor);
+ case '[': return _setToken(JSONTokenType::LBracket, cursor);
+ case ']': return _setToken(JSONTokenType::RBracket, cursor);
+ case '{': return _setToken(JSONTokenType::LBrace, cursor);
+ case '}': return _setToken(JSONTokenType::RBrace, cursor);
+
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ {
+ LexResult res = _lexNumber(cursor - 1);
+ if (res.cursor == nullptr)
+ {
+ return _setToken(JSONTokenType::Invalid, m_lexemeStart);
+ }
+ return _setToken(res.type, res.cursor);
+ }
+ case 't':
+ {
+ if (cursor[0] == 'r' && cursor[1] == 'u' && cursor[2] == 'e')
+ {
+ return _setToken(JSONTokenType::True, cursor + 3);
+ }
+ m_sink->diagnose(_getLoc(m_lexemeStart), JSONDiagnostics::expectingValueName);
+ return _setInvalidToken();
+ }
+ case 'f':
+ {
+ if (cursor[0] == 'a' && cursor[1] == 'l' && cursor[2] == 's' && cursor[3] == 'e')
+ {
+ return _setToken(JSONTokenType::False, cursor + 4);
+ }
+ m_sink->diagnose(_getLoc(m_lexemeStart), JSONDiagnostics::expectingValueName);
+ return _setInvalidToken();
+ }
+ case 'n':
+ {
+ if (cursor[0] == 'u' && cursor[1] == 'l' && cursor[2] == 'l')
+ {
+ return _setToken(JSONTokenType::Null, cursor + 3);
+ }
+ m_sink->diagnose(_getLoc(m_lexemeStart), JSONDiagnostics::expectingValueName);
+ return _setInvalidToken();
+ }
+ default:
+ {
+ StringBuilder buf;
+ if (c <= ' ' || c >= 0x7e)
+ {
+ static const char s_hex[] = "012345679abcdef";
+
+ char hexBuf[5] = "0x";
+
+ uint32_t value = c;
+ hexBuf[2] = s_hex[((value >> 4) & 0xf)];
+ hexBuf[3] = s_hex[(value & 0xf)];
+ hexBuf[4] = 0;
+
+ buf << hexBuf;
+ }
+ else
+ {
+ buf << c;
+ }
+
+ m_sink->diagnose(_getLoc(m_lexemeStart), JSONDiagnostics::unexpectedCharacter);
+ return _setInvalidToken();
+ }
+ }
+ }
+}
+
+JSONLexer::LexResult JSONLexer::_lexNumber(const char* cursor)
+{
+ JSONTokenType tokenType = JSONTokenType::IntegerLiteral;
+
+ if (*cursor == '-')
+ {
+ cursor++;
+ }
+
+ if (*cursor == '0')
+ {
+ // Can only be followed by . exponent, or nothing
+ cursor++;
+ }
+ else if (*cursor >= '1' && *cursor <= '9')
+ {
+ cursor++;
+ while (CharUtil::isDigit(*cursor))
+ {
+ cursor++;
+ }
+ }
+
+ // Theres a fraction
+ if (*cursor == '.')
+ {
+ tokenType = JSONTokenType::FloatLiteral;
+ // Skip the dot
+ cursor++;
+ // Must have at least one digit
+ if (!CharUtil::isDigit(*cursor))
+ {
+ m_sink->diagnose(_getLoc(cursor), JSONDiagnostics::expectingADigit);
+ return LexResult{ JSONTokenType::Invalid, nullptr };
+ }
+ // Skip the digit
+ cursor++;
+ // Skip any more digits
+ while (CharUtil::isDigit(*cursor)) cursor++;
+ }
+
+ // Theres an exponent
+ if (*cursor == 'e' || *cursor == 'E')
+ {
+ tokenType = JSONTokenType::FloatLiteral;
+
+ // Has an exponent
+ cursor++;
+
+ // Skip +/- if has one
+ if (*cursor == '+' || *cursor == '-')
+ {
+ cursor++;
+ }
+
+ // Must have one digit
+ if (!CharUtil::isDigit(*cursor))
+ {
+ m_sink->diagnose(_getLoc(cursor), JSONDiagnostics::expectingADigit);
+ return LexResult{ JSONTokenType::Invalid, nullptr };
+ }
+
+ // Skip the digit
+ cursor++;
+ // Skip any more digits
+ while (CharUtil::isDigit(*cursor)) cursor++;
+ }
+
+ return LexResult{tokenType, cursor};
+}
+
+const char* JSONLexer::_lexString(const char* cursor)
+{
+ // We've skipped the first "
+ while (true)
+ {
+ const char c = *cursor++;
+
+ switch (c)
+ {
+ case 0:
+ {
+ m_sink->diagnose(_getLoc(cursor - 1), JSONDiagnostics::endOfFileInLiteral);
+ return nullptr;
+ }
+ case '"':
+ {
+ return cursor;
+ }
+ case '\\':
+ {
+ const char nextC = *cursor;
+ switch (nextC)
+ {
+ case '"':
+ case '\\':
+ case '/':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ {
+ ++cursor;
+ break;
+ }
+ case 'u':
+ {
+ cursor++;
+ for (Index i = 0; i < 4; ++i)
+ {
+ if (!CharUtil::isHexDigit(cursor[i]))
+ {
+ m_sink->diagnose(_getLoc(cursor), JSONDiagnostics::expectingAHexDigit);
+ return nullptr;
+ }
+ }
+ cursor += 4;
+ break;
+ }
+ }
+
+ }
+ // Somewhat surprisingly it appears it's valid to have \r\n inside of quotes.
+ default: break;
+ }
+ }
+}
+
+const char* JSONLexer::_lexLineComment(const char* cursor)
+{
+ for (;;)
+ {
+ const char c = *cursor++;
+
+ switch (c)
+ {
+ case '\n':
+ case '\r':
+ {
+ // We need to skip to the next line
+ return _handleEndOfLine(c, cursor);
+ }
+ case 0:
+ {
+ return cursor - 1;
+ }
+ }
+ }
+}
+
+const char* JSONLexer::_lexBlockComment(const char* cursor)
+{
+ for (;;)
+ {
+ const char c = *cursor++;
+ switch (c)
+ {
+ case 0:
+ {
+ m_sink->diagnose(_getLoc(cursor), JSONDiagnostics::endOfFileInComment);
+ return nullptr;
+ }
+ case '*':
+ {
+ if (*cursor == '/')
+ {
+ return cursor + 1;
+ }
+ break;
+ }
+ default: break;
+ }
+ }
+}
+
+const char* JSONLexer::_lexWhitespace(const char* cursor)
+{
+ while (true)
+ {
+ const char c = *cursor;
+
+ // Might want to use CharUtil::isWhitespace...
+
+ switch (c)
+ {
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ {
+ cursor++;
+ break;
+ }
+ default:
+ {
+ // Hit non white space
+ return cursor;
+ }
+ }
+
+ }
+}
+
+} // namespace Slang
diff --git a/source/compiler-core/slang-json-lexer.h b/source/compiler-core/slang-json-lexer.h
new file mode 100644
index 000000000..03f16d445
--- /dev/null
+++ b/source/compiler-core/slang-json-lexer.h
@@ -0,0 +1,89 @@
+// slang-json-lexer.h
+#ifndef SLANG_JSON_LEXER_H
+#define SLANG_JSON_LEXER_H
+
+#include "../core/slang-basic.h"
+
+#include "slang-source-loc.h"
+#include "slang-diagnostic-sink.h"
+
+namespace Slang {
+
+enum class JSONTokenType
+{
+ Invalid,
+ IntegerLiteral,
+ FloatLiteral,
+ StringLiteral,
+ LBracket,
+ RBracket,
+ LBrace,
+ RBrace,
+ Comma,
+ Colon,
+ True,
+ False,
+ Null,
+ EndOfFile,
+ CountOf,
+};
+
+struct JSONToken
+{
+ JSONTokenType type; ///< The token type
+ SourceLoc loc; ///< Location in the source file
+ uint32_t length; ///< The length of the token in bytes
+};
+
+class JSONLexer
+{
+public:
+ JSONToken& peekToken() { return m_token; }
+ JSONTokenType peekType() { return m_token.type; }
+
+ JSONTokenType advance();
+
+ SlangResult init(SourceView* sourceView, DiagnosticSink* sink);
+
+protected:
+ struct LexResult
+ {
+ JSONTokenType type;
+ const char* cursor;
+ };
+
+ /// Get the location of the cursor
+ SLANG_FORCE_INLINE SourceLoc _getLoc(const char* cursor) const { return m_startLoc + (cursor - m_contentStart); }
+ const char* _lexLineComment(const char* cursor);
+ const char* _lexBlockComment(const char* cursor);
+ const char* _lexWhitespace(const char* cursor);
+ const char* _lexString(const char* cursor);
+ LexResult _lexNumber(const char* cursor);
+
+ SLANG_FORCE_INLINE JSONTokenType _setToken(JSONTokenType type, const char* cursor)
+ {
+ SLANG_ASSERT(cursor >= m_lexemeStart);
+ m_token.type = type;
+ m_token.loc = m_startLoc + (m_lexemeStart - m_contentStart);
+ m_token.length = uint32_t(cursor - m_lexemeStart);
+ m_cursor = cursor;
+ return type;
+ }
+ JSONTokenType _setInvalidToken();
+
+ JSONToken m_token;
+
+ const char* m_cursor;
+ const char* m_lexemeStart;
+
+ const char* m_contentStart;
+
+ SourceLoc m_startLoc;
+
+ SourceView* m_sourceView;
+ DiagnosticSink* m_sink;
+};
+
+} // namespace Slang
+
+#endif
diff --git a/source/core/slang-string-escape-util.cpp b/source/core/slang-string-escape-util.cpp
index 5e4db269c..a91d88e05 100644
--- a/source/core/slang-string-escape-util.cpp
+++ b/source/core/slang-string-escape-util.cpp
@@ -87,6 +87,8 @@ SlangResult SpaceStringEscapeHandler::appendEscaped(const UnownedStringSlice& sl
}
}
+
+
// !!!!!!!!!!!!!!!!!!!!!!!!!! CppStringEscapeHandler !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
class CppStringEscapeHandler : public StringEscapeHandler
@@ -445,10 +447,380 @@ SlangResult CppStringEscapeHandler::lexQuoted(const char* cursor, const char** o
}
}
+// !!!!!!!!!!!!!!!!!!!!!!!!!! JSONStringEscapeHandler !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+class JSONStringEscapeHandler : public StringEscapeHandler
+{
+public:
+ typedef StringEscapeHandler Super;
+
+ virtual bool isQuotingNeeded(const UnownedStringSlice& slice) SLANG_OVERRIDE { SLANG_UNUSED(slice); return true; }
+ virtual bool isEscapingNeeded(const UnownedStringSlice& slice) SLANG_OVERRIDE;
+ virtual SlangResult appendEscaped(const UnownedStringSlice& slice, StringBuilder& out) SLANG_OVERRIDE;
+ virtual SlangResult appendUnescaped(const UnownedStringSlice& slice, StringBuilder& out) SLANG_OVERRIDE;
+ virtual SlangResult lexQuoted(const char* cursor, const char** outCursor) SLANG_OVERRIDE;
+
+ JSONStringEscapeHandler() : Super('"') {}
+};
+
+bool JSONStringEscapeHandler::isEscapingNeeded(const UnownedStringSlice& slice)
+{
+ const char* cur = slice.begin();
+ const char*const end = slice.end();
+
+ for (; cur < end; ++cur)
+ {
+ const char c = *cur;
+
+ switch (c)
+ {
+ case '\"':
+ case '\\':
+ case '/':
+ {
+ return true;
+ }
+ default:
+ {
+ if (c < ' ' || c >= 0x7e)
+ {
+ return true;
+ }
+ break;
+ }
+ }
+ }
+ return false;
+}
+
+SlangResult JSONStringEscapeHandler::lexQuoted(const char* cursor, const char** outCursor)
+{
+ // We've skipped the first "
+ while (true)
+ {
+ const char c = *cursor++;
+
+ switch (c)
+ {
+ case 0: return SLANG_FAIL;
+ case '"':
+ {
+ *outCursor = cursor;
+ return SLANG_OK;
+ }
+ case '\\':
+ {
+ const char nextC = *cursor;
+ switch (nextC)
+ {
+ case '"':
+ case '\\':
+ case '/':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ {
+ ++cursor;
+ break;
+ }
+ case 'u':
+ {
+ cursor++;
+ for (Index i = 0; i < 4; ++i)
+ {
+ if (!CharUtil::isHexDigit(cursor[i]))
+ {
+ return SLANG_FAIL;
+ }
+ }
+ cursor += 4;
+ break;
+ }
+ }
+ }
+ // Somewhat surprisingly it appears it's valid to have \r\n inside of quotes.
+ default: break;
+ }
+ }
+}
+
+static char _getJSONEscapedChar(char c)
+{
+ switch (c)
+ {
+ case '\b': return 'b';
+ case '\f': return 'f';
+ case '\n': return 'n';
+ case '\r': return 'r';
+ case '\t': return 't';
+ case '\\': return '\\';
+ case '/': return '/';
+ case '"': return '"';
+ default: return 0;
+ }
+}
+
+static char _getJSONUnescapedChar(char c)
+{
+ switch (c)
+ {
+ case 'b': return '\b';
+ case 'f': return '\f';
+ case 'n': return '\n';
+ case 'r': return '\r';
+ case 't': return '\t';
+ case '\\': return '\\';
+ case '/': return '/';
+ case '"': return '"';
+ default: return 0;
+ }
+}
+
+static const char s_hex[] = "0123456789abcdef";
+
+// Outputs ioSlice with the chars remaining after utf8 encoded value
+// Returns ~uint32_t(0) if can't decode
+static uint32_t _getUnicodePointFromUTF8(UnownedStringSlice& ioSlice)
+{
+ const Index length = ioSlice.getLength();
+ SLANG_ASSERT(length > 0);
+ const char* cur = ioSlice.begin();
+
+ uint32_t codePoint = 0;
+ unsigned int leading = cur[0];
+ unsigned int mask = 0x80;
+
+ Index count = 0;
+ while (leading & mask)
+ {
+ count++;
+ mask >>= 1;
+ }
+
+ if (count > length)
+ {
+ SLANG_ASSERT(!"Can't decode");
+ ioSlice = UnownedStringSlice(ioSlice.end(), ioSlice.end());
+ return ~uint32_t(0);
+ }
+
+ codePoint = (leading & (mask - 1));
+ for (Index i = 1; i <= count - 1; i++)
+ {
+ codePoint <<= 6;
+ codePoint += (cur[i] & 0x3F);
+ }
+
+ ioSlice = UnownedStringSlice(cur + count, ioSlice.end());
+ return codePoint;
+}
+
+static void _appendHex16(uint32_t value, StringBuilder& out)
+{
+ // Let's go with hex
+ char buf[] = "\\u0000";
+
+ buf[2] = s_hex[(value >> 12) & 0xf];
+ buf[3] = s_hex[(value >> 8) & 0xf];
+ buf[4] = s_hex[(value >> 4) & 0xf];
+ buf[5] = s_hex[(value >> 0) & 0xf];
+
+ out.append(UnownedStringSlice(buf, 6));
+}
+
+SlangResult JSONStringEscapeHandler::appendEscaped(const UnownedStringSlice& slice, StringBuilder& out)
+{
+ const char* start = slice.begin();
+ const char* cur = start;
+ const char*const end = slice.end();
+
+ for (; cur < end; ++cur)
+ {
+ const char c = *cur;
+
+ const char escapedChar = _getJSONEscapedChar(c);
+
+ if (escapedChar)
+ {
+ // Flush
+ if (start < cur)
+ {
+ out.append(start, cur);
+ }
+ out.appendChar('\\');
+ out.appendChar(escapedChar);
+
+ start = cur + 1;
+ }
+ else if (uint8_t(c) & 0x80)
+ {
+ // Flush
+ if (start < cur)
+ {
+ out.append(start, cur);
+ }
+
+ // UTF8
+ UnownedStringSlice remainingSlice(cur, end);
+ uint32_t codePoint = _getUnicodePointFromUTF8(remainingSlice);
+
+ // We only support up to 16 bit unicode values for now...
+ SLANG_ASSERT(codePoint < 0x10000);
+
+ _appendHex16(codePoint, out);
+
+ cur = remainingSlice.begin() - 1;
+ start = cur + 1;
+ }
+ else if (uint8_t(c) < ' ' || (c >= 0x7e))
+ {
+ if (start < cur)
+ {
+ out.append(start, cur);
+ }
+
+ _appendHex16(uint32_t(c), out);
+
+ start = cur + 1;
+ }
+ else
+ {
+ // Can go out as it is
+ }
+ }
+
+ // Flush at the end
+ if (start < end)
+ {
+ out.append(start, end);
+ }
+ return SLANG_OK;
+}
+
+SlangResult JSONStringEscapeHandler::appendUnescaped(const UnownedStringSlice& slice, StringBuilder& out)
+{
+ const char* start = slice.begin();
+ const char* cur = start;
+ const char*const end = slice.end();
+
+ for (; cur < end; ++cur)
+ {
+ const char c = *cur;
+
+ if (c == '\\')
+ {
+ // Flush
+ if (start < cur)
+ {
+ out.append(start, cur);
+ }
+
+ /// Next
+ cur++;
+
+ if (cur >= end)
+ {
+ return SLANG_FAIL;
+ }
+
+ // Need to handle various escape sequence cases
+ switch (*cur)
+ {
+ case '\"':
+ case '\\':
+ case '/':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ {
+ const char unescapedChar = _getJSONUnescapedChar(*cur);
+ if (unescapedChar == 0)
+ {
+ // Don't know how to unescape that char
+ return SLANG_FAIL;
+ }
+ out.appendChar(unescapedChar);
+
+ start = cur + 1;
+ break;
+ }
+ case 'u':
+ {
+ uint32_t value = 0;
+ cur++;
+
+ if (cur + 4 > end)
+ {
+ return SLANG_FAIL;
+ }
+
+ for (Index i = 0; i < 4; ++i)
+ {
+ const char digitC = cur[i];
+
+ uint32_t digitValue;
+ if (digitC >= '0' && digitC <= '9')
+ {
+ digitValue = digitC - '0';
+ }
+ else if (digitC >= 'a' && digitC <= 'f')
+ {
+ digitValue = digitC -'a' + 10;
+ }
+ else if(digitC >= 'A' && digitC <= 'F')
+ {
+ digitValue = digitC - 'A' + 10;
+ }
+ else
+ {
+ return SLANG_FAIL;
+ }
+ SLANG_ASSERT(digitValue < 0x10);
+ value = (value << 4) | digitValue;
+ }
+ cur += 4;
+
+ // NOTE! Strictly speaking we may want to combine 2 UTF16 surrogates to make a single
+ // UTF8 encoded char.
+
+ // Need to encode in UTF8 to concat
+
+ char buf[8];
+ int len = EncodeUnicodePointToUTF8(buf, value);
+
+ out.append(buf, buf + len);
+
+ start = cur;
+ cur--;
+ break;
+ }
+ default:
+ {
+ // Can't decode
+ return SLANG_FAIL;
+ }
+ }
+ }
+ }
+
+ // Flush
+ if (start < end)
+ {
+ out.append(start, end);
+ }
+
+ return SLANG_OK;
+}
+
// !!!!!!!!!!!!!!!!!!!!!!!!!! StringEscapeUtil !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
static CppStringEscapeHandler g_cppHandler;
static SpaceStringEscapeHandler g_spaceHandler;
+static JSONStringEscapeHandler g_jsonHandler;
StringEscapeUtil::Handler* StringEscapeUtil::getHandler(Style style)
{
@@ -456,6 +828,7 @@ StringEscapeUtil::Handler* StringEscapeUtil::getHandler(Style style)
{
case Style::Cpp: return &g_cppHandler;
case Style::Space: return &g_spaceHandler;
+ case Style::JSON: return &g_jsonHandler;
default: return nullptr;
}
}
diff --git a/source/core/slang-string-escape-util.h b/source/core/slang-string-escape-util.h
index 9dc653df3..c3039eb47 100644
--- a/source/core/slang-string-escape-util.h
+++ b/source/core/slang-string-escape-util.h
@@ -51,6 +51,7 @@ struct StringEscapeUtil
{
Cpp, ///< Cpp style quoting and escape handling
Space, ///< Applies quotes if there are spaces. Does not escape.
+ JSON, ///< Json encoding
};
/// Given a style returns a handler
diff --git a/tools/slang-test/unit-test-json.cpp b/tools/slang-test/unit-test-json.cpp
new file mode 100644
index 000000000..fff16b136
--- /dev/null
+++ b/tools/slang-test/unit-test-json.cpp
@@ -0,0 +1,180 @@
+
+#include "../../source/compiler-core/slang-json-lexer.h"
+#include "../../source/core/slang-string-escape-util.h"
+
+#include "test-context.h"
+
+using namespace Slang;
+
+namespace { // anonymous
+
+struct Element
+{
+ JSONTokenType type;
+ const char* value;
+};
+
+} // anonymous
+
+static SlangResult _lex(const char* in, DiagnosticSink* sink, List<JSONToken>& toks)
+{
+ SourceManager* sourceManager = sink->getSourceManager();
+
+ String contents(in);
+ SourceFile* sourceFile = sourceManager->createSourceFileWithString(PathInfo::makeUnknown(), contents);
+ SourceView* sourceView = sourceManager->createSourceView(sourceFile, nullptr, SourceLoc());
+
+ JSONLexer lexer;
+
+ lexer.init(sourceView, sink);
+
+ while (lexer.peekType() != JSONTokenType::EndOfFile)
+ {
+ if (lexer.peekType() == JSONTokenType::Invalid)
+ {
+ toks.add(lexer.peekToken());
+ return SLANG_FAIL;
+ }
+
+ toks.add(lexer.peekToken());
+ lexer.advance();
+ }
+
+ toks.add(lexer.peekToken());
+
+ // If we advance from end of file we should still be at EndOfFile
+ SLANG_ASSERT(lexer.advance() == JSONTokenType::EndOfFile);
+
+ return SLANG_OK;
+}
+
+static bool _areEqual(SourceManager* sourceManager, const List<JSONToken>& toks, const Element* eles, Index elesCount)
+{
+ if (toks.getCount() != elesCount)
+ {
+ return false;
+ }
+
+ SourceView* sourceView = toks.getCount() ? sourceManager->findSourceView(toks[0].loc) : nullptr;
+ const char*const content = sourceView ? sourceView->getContent().begin() : nullptr;
+
+ for (Index i = 0; i < toks.getCount(); ++i)
+ {
+ const JSONToken& tok = toks[i];
+ const auto& ele = eles[i];
+
+ if (tok.type != ele.type)
+ {
+ return false;
+ }
+
+ SLANG_ASSERT(sourceView->getRange().contains(tok.loc));
+
+ const char* start = content + sourceView->getRange().getOffset(tok.loc);
+
+ UnownedStringSlice lexeme(start, tok.length);
+
+ if (lexeme != ele.value)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void jsonUnitTest()
+{
+ SourceManager sourceManager;
+ sourceManager.initialize(nullptr, nullptr);
+ DiagnosticSink sink(&sourceManager, nullptr);
+
+ {
+ const char text[] = " { \"Hello\" : [ \"World\", 1, 2.0, -3.0, -435.5345435, 45e-10, 421.00e+20, 17e1] }";
+
+ const Element eles[] =
+ {
+ {JSONTokenType::LBrace, "{" },
+ {JSONTokenType::StringLiteral, "\"Hello\""},
+ {JSONTokenType::Colon, ":" },
+ {JSONTokenType::LBracket, "[" },
+ {JSONTokenType::StringLiteral, "\"World\"" },
+ {JSONTokenType::Comma, "," },
+ {JSONTokenType::IntegerLiteral, "1" },
+ {JSONTokenType::Comma, "," },
+ {JSONTokenType::FloatLiteral, "2.0" },
+ {JSONTokenType::Comma, "," },
+ {JSONTokenType::FloatLiteral, "-3.0" },
+ {JSONTokenType::Comma, "," },
+ {JSONTokenType::FloatLiteral, "-435.5345435" },
+ {JSONTokenType::Comma, "," },
+ {JSONTokenType::FloatLiteral, "45e-10" },
+ {JSONTokenType::Comma, "," },
+ {JSONTokenType::FloatLiteral, "421.00e+20" },
+ {JSONTokenType::Comma, "," },
+ {JSONTokenType::FloatLiteral, "17e1" },
+ {JSONTokenType::RBracket, "]" },
+ {JSONTokenType::RBrace, "}" },
+ {JSONTokenType::EndOfFile, "" },
+ };
+
+ List<JSONToken> toks;
+ SLANG_CHECK(SLANG_SUCCEEDED(_lex(text, &sink, toks)));
+
+ SLANG_CHECK(_areEqual(&sourceManager, toks, eles, SLANG_COUNT_OF(eles)));
+ }
+
+ {
+ StringEscapeHandler* handler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::JSON);
+
+
+ {
+ const auto slice = UnownedStringSlice::fromLiteral("\n\r\b\f\t \"\\/ Some text...");
+
+ SLANG_CHECK(handler->isEscapingNeeded(slice));
+ SLANG_CHECK(!handler->isEscapingNeeded(UnownedStringSlice::fromLiteral("Hello!")));
+
+ StringBuilder escaped;
+ handler->appendEscaped(slice, escaped);
+
+ StringBuilder unescaped;
+ handler->appendUnescaped(escaped.getUnownedSlice(), unescaped);
+
+ SLANG_CHECK(unescaped == slice);
+ }
+
+ {
+ uint32_t v = 0x7f;
+
+ StringBuilder buf;
+ while (v < 0x10000)
+ {
+ char work[10] = "\\u";
+
+ for (Int i = 0; i < 4; ++i)
+ {
+ const uint32_t digitValue = (v >> ((3 - i) * 4)) & 0xf;
+
+ char digitC = (digitValue > 9) ? char(digitValue - 10 + 'a') : char(digitValue + '0');
+ work[i + 2] = digitC;
+ }
+
+ buf << UnownedStringSlice(work, 6);
+
+ v += v;
+ }
+
+ // Decode it
+ StringBuilder unescaped;
+ handler->appendUnescaped(buf.getUnownedSlice(), unescaped);
+
+ // Encode it
+ StringBuilder escaped;
+ handler->appendEscaped(unescaped.getUnownedSlice(), escaped);
+
+ SLANG_CHECK(escaped == buf);
+ }
+ }
+}
+
+SLANG_UNIT_TEST("JSON", jsonUnitTest);