diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2021-05-27 15:38:52 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-05-27 12:38:52 -0700 |
| commit | 15f5cffc3026a5faed7046ae7d75ec6b56cf4a4c (patch) | |
| tree | e6c59288e453ac3eb788068a155e7aa607399009 /source/compiler-core | |
| parent | 969943f4b751d3cad8ac548f9cf0f65406935bad (diff) | |
JSON Parser and Writer (#1859)
* #include an absolute path didn't work - because paths were taken to always be relative.
* WIP JSONWriter/JSONParser.
* Checking different Layout styles for JSON.
* Add slang-json-parser.h/.cpp
Diffstat (limited to 'source/compiler-core')
| -rw-r--r-- | source/compiler-core/slang-json-diagnostic-defs.h | 2 | ||||
| -rw-r--r-- | source/compiler-core/slang-json-lexer.cpp | 64 | ||||
| -rw-r--r-- | source/compiler-core/slang-json-lexer.h | 22 | ||||
| -rw-r--r-- | source/compiler-core/slang-json-parser.cpp | 384 | ||||
| -rw-r--r-- | source/compiler-core/slang-json-parser.h | 188 |
5 files changed, 659 insertions, 1 deletions
diff --git a/source/compiler-core/slang-json-diagnostic-defs.h b/source/compiler-core/slang-json-diagnostic-defs.h index a4b260857..da3b2a28c 100644 --- a/source/compiler-core/slang-json-diagnostic-defs.h +++ b/source/compiler-core/slang-json-diagnostic-defs.h @@ -33,5 +33,7 @@ DIAGNOSTIC(20003, Error, endOfFileInComment, "end of file in comment") DIAGNOSTIC(20004, Error, expectingAHexDigit, "expecting a hex digit") DIAGNOSTIC(20005, Error, expectingADigit, "expecting a digit") DIAGNOSTIC(20006, Error, expectingValueName, "expecting value name [null, true, false]") +DIAGNOSTIC(20007, Error, unexpectedTokenExpectedTokenType, "unexpected '$0', expected '$1'") +DIAGNOSTIC(20008, Error, unexpectedToken, "unexpected '$0'") #undef DIAGNOSTIC diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp index 19a5b29a7..261d5f413 100644 --- a/source/compiler-core/slang-json-lexer.cpp +++ b/source/compiler-core/slang-json-lexer.cpp @@ -45,6 +45,47 @@ JSONTokenType JSONLexer::_setInvalidToken() return _setToken(JSONTokenType::Invalid, m_lexemeStart); } +SlangResult JSONLexer::expect(JSONTokenType type) +{ + if (type != peekType()) + { + m_sink->diagnose(m_token.loc, JSONDiagnostics::unexpectedTokenExpectedTokenType, getJSONTokenAsText(peekType()), getJSONTokenAsText(type)); + return SLANG_FAIL; + } + + advance(); + return SLANG_OK; +} + +SlangResult JSONLexer::expect(JSONTokenType type, JSONToken& out) +{ + if (type != peekType()) + { + m_sink->diagnose(m_token.loc, JSONDiagnostics::unexpectedTokenExpectedTokenType, getJSONTokenAsText(peekType()), getJSONTokenAsText(type)); + return SLANG_FAIL; + } + + out = m_token; + advance(); + return SLANG_OK; +} + +bool JSONLexer::advanceIf(JSONTokenType type) +{ + if (type == peekType()) + { + advance(); + return true; + } + return false; +} + +UnownedStringSlice JSONLexer::getLexeme(const JSONToken& tok) const +{ + auto offset = m_sourceView->getRange().getOffset(tok.loc); + return UnownedStringSlice(m_sourceView->getContent().begin() + offset, tok.length); +} + JSONTokenType JSONLexer::advance() { const char* cursor = m_cursor; @@ -382,4 +423,27 @@ const char* JSONLexer::_lexWhitespace(const char* cursor) } } +UnownedStringSlice getJSONTokenAsText(JSONTokenType type) +{ + switch (type) + { + case JSONTokenType::Invalid: return UnownedStringSlice::fromLiteral("invalid"); + case JSONTokenType::IntegerLiteral: return UnownedStringSlice::fromLiteral("integer literal"); + case JSONTokenType::FloatLiteral: return UnownedStringSlice::fromLiteral("float literal"); + case JSONTokenType::StringLiteral: return UnownedStringSlice::fromLiteral("string literal"); + case JSONTokenType::LBracket: return UnownedStringSlice::fromLiteral("["); + case JSONTokenType::RBracket: return UnownedStringSlice::fromLiteral("]"); + case JSONTokenType::LBrace: return UnownedStringSlice::fromLiteral("{"); + case JSONTokenType::RBrace: return UnownedStringSlice::fromLiteral("}"); + case JSONTokenType::Comma: return UnownedStringSlice::fromLiteral(","); + case JSONTokenType::Colon: return UnownedStringSlice::fromLiteral(":"); + case JSONTokenType::True: return UnownedStringSlice::fromLiteral("true"); + case JSONTokenType::False: return UnownedStringSlice::fromLiteral("false"); + case JSONTokenType::Null: return UnownedStringSlice::fromLiteral("null"); + case JSONTokenType::EndOfFile: return UnownedStringSlice::fromLiteral("end of file"); + default: break; + } + SLANG_UNEXPECTED("JSONTokenType not known"); +} + } // namespace Slang diff --git a/source/compiler-core/slang-json-lexer.h b/source/compiler-core/slang-json-lexer.h index 03f16d445..ee4b60f75 100644 --- a/source/compiler-core/slang-json-lexer.h +++ b/source/compiler-core/slang-json-lexer.h @@ -35,14 +35,34 @@ struct JSONToken uint32_t length; ///< The length of the token in bytes }; +UnownedStringSlice getJSONTokenAsText(JSONTokenType type); + class JSONLexer { public: + /// Peek the current token JSONToken& peekToken() { return m_token; } + /// Peek the current type JSONTokenType peekType() { return m_token.type; } - + /// Peek the current SourceLoc + SourceLoc peekLoc() { return m_token.loc; } + + /// Get the lexeme of JSONToken + UnownedStringSlice getLexeme(const JSONToken& tok) const; + /// Peek the lexeme at the current position + UnownedStringSlice peekLexeme() const { return getLexeme(m_token); } + JSONTokenType advance(); + /// Expects a token of type type. If found advances, if not returns an error and outputs to diagnostic sink + SlangResult expect(JSONTokenType type); + /// Same as expect except out will hold the token. + SlangResult expect(JSONTokenType type, JSONToken& out); + + /// Returns true and advances if current token is type + bool advanceIf(JSONTokenType type); + + /// Must be called before use SlangResult init(SourceView* sourceView, DiagnosticSink* sink); protected: diff --git a/source/compiler-core/slang-json-parser.cpp b/source/compiler-core/slang-json-parser.cpp new file mode 100644 index 000000000..478b02fb8 --- /dev/null +++ b/source/compiler-core/slang-json-parser.cpp @@ -0,0 +1,384 @@ +// slang-json-parser.cpp +#include "slang-json-parser.h" + +#include "slang-json-diagnostics.h" + +/* +https://www.json.org/json-en.html +*/ + +namespace Slang { + +SlangResult JSONParser::_parseObject() +{ + SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::LBrace)); + + m_listener->startObject(); + + if (m_lexer->advanceIf(JSONTokenType::RBrace)) + { + m_listener->endObject(); + return SLANG_OK; + } + + while (true) + { + JSONToken keyToken; + SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::StringLiteral, keyToken)); + m_listener->addLexemeKey(m_lexer->getLexeme(keyToken)); + + SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::Colon)); + + SLANG_RETURN_ON_FAIL(_parseValue()); + if (m_lexer->advanceIf(JSONTokenType::Comma)) + { + continue; + } + + break; + } + + SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::RBrace)); + m_listener->endObject(); + return SLANG_OK; +} + +SlangResult JSONParser::_parseArray() +{ + SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::LBracket)); + + m_listener->startArray(); + + if (m_lexer->advanceIf(JSONTokenType::RBracket)) + { + m_listener->endArray(); + return SLANG_OK; + } + + while (true) + { + SLANG_RETURN_ON_FAIL(_parseValue()); + if (m_lexer->advanceIf(JSONTokenType::Comma)) + { + continue; + } + break; + } + + SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::RBracket)); + m_listener->endArray(); + return SLANG_OK; +} + +SlangResult JSONParser::_parseValue() +{ + switch (m_lexer->peekType()) + { + case JSONTokenType::True: + case JSONTokenType::False: + case JSONTokenType::Null: + case JSONTokenType::IntegerLiteral: + case JSONTokenType::FloatLiteral: + case JSONTokenType::StringLiteral: + { + m_listener->addLexemeValue(m_lexer->peekType(), m_lexer->peekLexeme()); + m_lexer->advance(); + return SLANG_OK; + } + case JSONTokenType::LBracket: + { + return _parseArray(); + } + case JSONTokenType::LBrace: + { + return _parseObject(); + } + default: + { + m_sink->diagnose(m_lexer->peekLoc(), JSONDiagnostics::unexpectedToken, getJSONTokenAsText(m_lexer->peekType())); + return SLANG_FAIL; + } + case JSONTokenType::Invalid: + { + // It's a lex error, so just fail + return SLANG_FAIL; + } + } +} + +SlangResult JSONParser::parse(JSONLexer* lexer, SourceView* sourceView, JSONListener* listener, DiagnosticSink* sink) +{ + m_sourceView = sourceView; + m_lexer = lexer; + m_listener = listener; + m_sink = sink; + + SLANG_RETURN_ON_FAIL(_parseValue()); + + return m_lexer->expect(JSONTokenType::EndOfFile); +} + +/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + JSONWriter + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + +Index JSONWriter::_getLineLengthAfterIndent() +{ + if (m_emittedIndent < 0) + { + return 0; + } + + Index lineLength = m_builder.getLength() - m_lineStart; + return lineLength - m_emittedIndent * m_indentCharCount; +} + + +void JSONWriter::_emitIndent() +{ + m_builder.appendRepeatedChar(m_indentChar, m_currentIndent * m_indentCharCount); + m_emittedIndent = m_currentIndent; + SLANG_ASSERT(m_emittedIndent >= 0); +} + +void JSONWriter::_maybeEmitIndent() +{ + if (m_emittedIndent < 0) + { + _emitIndent(); + } +} + +void JSONWriter::_nextLine() +{ + m_builder << "\n"; + m_lineStart = m_builder.getLength(); + m_lineIndex++; + m_emittedIndent = -1; +} + +void JSONWriter::_maybeNextLine() +{ + // Nothing has been emitted, because nothing has been indented, and we must indent before an emit + if (m_emittedIndent < 0) + { + } + else + { + _nextLine(); + } +} + +void JSONWriter::_handleFormat(Location loc) +{ + switch (m_format) + { + case IndentationStyle::Allman: + { + if (isComma(loc)) + { + _maybeNextLine(); + } + else + { + if (isBefore(loc)) + { + _maybeNextLine(); + if (isClose(loc)) + { + _dedent(); + } + } + else + { + _maybeNextLine(); + if (isOpen(loc)) + { + _indent(); + } + } + } + break; + } + case IndentationStyle::KNR: + { + if (isComma(loc)) + { + if (loc == Location::FieldComma || + (m_lineLengthLimit > 0 && _getLineLengthAfterIndent() > m_lineLengthLimit)) + { + _maybeNextLine(); + } + } + else + { + if (isBefore(loc)) + { + if (isClose(loc)) + { + _maybeNextLine(); + _dedent(); + } + } + else + { + _maybeNextLine(); + if (isOpen(loc)) + { + _indent(); + } + } + } + break; + } + } +} + +void JSONWriter::_maybeEmitComma() +{ + if (m_state.m_flags & State::Flag::HasPrevious) + { + _maybeEmitIndent(); + m_builder << ", "; + _handleFormat(Location::Comma); + } +} + +void JSONWriter::_maybeEmitFieldComma() +{ + if (m_state.m_flags & State::Flag::HasPrevious) + { + _maybeEmitIndent(); + m_builder << ", "; + _handleFormat(Location::FieldComma); + } +} + +void JSONWriter::startObject() +{ + SLANG_ASSERT(m_state.canEmitValue()); + + _maybeEmitComma(); + + _handleFormat(Location::BeforeOpenObject); + _maybeEmitIndent(); + m_builder << "{"; + _handleFormat(Location::AfterOpenObject); + + m_state.m_flags |= State::Flag::HasPrevious; + m_state.m_flags &= State::Flag::HasKey; + + m_stack.add(m_state); + + m_state.m_kind = State::Kind::Object; + m_state.m_flags = 0; +} + +void JSONWriter::endObject() +{ + SLANG_ASSERT(m_state.m_kind == State::Kind::Object); + + _handleFormat(Location::BeforeCloseObject); + _maybeEmitIndent(); + m_builder << "}"; + _handleFormat(Location::AfterCloseObject); + + m_state = m_stack.getLast(); + m_stack.removeLast(); +} + +void JSONWriter::startArray() +{ + SLANG_ASSERT(m_state.canEmitValue()); + + _maybeEmitComma(); + + _handleFormat(Location::BeforeOpenArray); + _maybeEmitIndent(); + m_builder << "["; + _handleFormat(Location::AfterOpenArray); + + m_state.m_flags |= State::Flag::HasPrevious; + m_state.m_flags &= State::Flag::HasKey; + + m_stack.add(m_state); + + m_state.m_kind = State::Kind::Array; + m_state.m_flags = 0; +} + +void JSONWriter::endArray() +{ + SLANG_ASSERT(m_state.m_kind == State::Kind::Array); + + _handleFormat(Location::BeforeCloseArray); + _maybeEmitIndent(); + m_builder << "]"; + _handleFormat(Location::AfterCloseArray); + + m_state = m_stack.getLast(); + m_stack.removeLast(); +} + +void JSONWriter::addLexemeKey(const UnownedStringSlice& key) +{ + SLANG_ASSERT(m_state.m_kind == State::Kind::Object && (m_state.m_flags & State::Flag::HasKey) == 0); + + _maybeEmitFieldComma(); + + // It should be quoted + SLANG_ASSERT(key.getLength() >= 2 && key[0] == '"' && key[key.getLength() - 1] == '"'); + + _maybeEmitIndent(); + m_builder << key << " : "; + + m_state.m_flags |= State::Flag::HasKey; + // We don't want it to emit a , after the : + m_state.m_flags &= ~State::Flag::HasPrevious; +} + +void JSONWriter::addLexemeValue(JSONTokenType type, const UnownedStringSlice& value) +{ + SLANG_ASSERT(m_state.canEmitValue()); + + _maybeEmitComma(); + _maybeEmitIndent(); + + switch (type) + { + case JSONTokenType::IntegerLiteral: + case JSONTokenType::FloatLiteral: + case JSONTokenType::StringLiteral: + { + m_builder << value; + break; + } + case JSONTokenType::True: + { + m_builder << UnownedStringSlice::fromLiteral("true"); + break; + } + case JSONTokenType::False: + { + m_builder << UnownedStringSlice::fromLiteral("false"); + break; + } + case JSONTokenType::Null: + { + m_builder << UnownedStringSlice::fromLiteral("null"); + break; + } + default: + { + SLANG_ASSERT(!"Can only emit values"); + } + } + // We have a previous + m_state.m_flags |= State::Flag::HasPrevious; + // We don't have a key + m_state.m_flags &= ~State::Flag::HasKey; +} + +} // namespace Slang diff --git a/source/compiler-core/slang-json-parser.h b/source/compiler-core/slang-json-parser.h new file mode 100644 index 000000000..2e907abb2 --- /dev/null +++ b/source/compiler-core/slang-json-parser.h @@ -0,0 +1,188 @@ +// slang-json-parser.h +#ifndef SLANG_JSON_PARSER_H +#define SLANG_JSON_PARSER_H + +#include "slang-json-lexer.h" + + +namespace Slang { + +class JSONListener +{ +public: + /// Start an object + virtual void startObject() = 0; + /// End an object + virtual void endObject() = 0; + /// Start an array + virtual void startArray() = 0; + /// End and array + virtual void endArray() = 0; + + /// Add the key lexeme. Must be followed by addLexemeValue. + virtual void addLexemeKey(const UnownedStringSlice& key) = 0; + /// Can be performed in an array or after an addLexemeKey in an object + virtual void addLexemeValue(JSONTokenType type, const UnownedStringSlice& value) = 0; +}; + +class JSONWriter : public JSONListener +{ +public: + /* + https://en.wikipedia.org/wiki/Indentation_style + */ + enum class IndentationStyle + { + Allman, ///< After every value, and opening, closing all other types + KNR, ///< K&R like. Fields have CR. + }; + + enum class LocationType : uint8_t + { + Object, + Array, + Comma, + }; + + // NOTE! Order must be kept the same without fixing is functions below + enum class Location + { + BeforeOpenObject, + BeforeCloseObject, + AfterOpenObject, + AfterCloseObject, + + BeforeOpenArray, + BeforeCloseArray, + AfterOpenArray, + AfterCloseArray, + + FieldComma, + Comma, + + CountOf, + }; + + static LocationType getLocationType(Location loc) { return isObject(loc) ? LocationType::Object : (isComma(loc) ? LocationType::Comma : LocationType::Array); } + + static bool isObjectLike(Location loc) { return Index(loc) <= Index(Location::AfterCloseArray); } + static bool isObject(Location loc) { return Index(loc) <= Index(Location::AfterCloseObject); } + static bool isArray(Location loc) { return Index(loc) >= Index(Location::BeforeOpenArray) && Index(loc) <= Index(Location::AfterCloseArray); } + static bool isComma(Location loc) { return Index(loc) >= Index(Location::FieldComma); } + static bool isOpen(Location loc) { return isObjectLike(loc) && (Index(loc) & 1) == 0; } + static bool isClose(Location loc) { return isObjectLike(loc) && (Index(loc) & 1) != 0; } + static bool isBefore(Location loc) { return isObjectLike(loc) && (Index(loc) & 2) == 0; } + static bool isAfter(Location loc) { return isObjectLike(loc) && (Index(loc) & 2) != 0; } + + // Implement JSONListener + virtual void startObject() SLANG_OVERRIDE; + virtual void endObject() SLANG_OVERRIDE; + virtual void startArray() SLANG_OVERRIDE; + virtual void endArray() SLANG_OVERRIDE; + virtual void addLexemeKey(const UnownedStringSlice& key) SLANG_OVERRIDE; + virtual void addLexemeValue(JSONTokenType type, const UnownedStringSlice& value) SLANG_OVERRIDE; + + /// Get the builder + StringBuilder& getBuilder() { return m_builder; } + + JSONWriter(IndentationStyle format, Index lineLengthLimit = -1) + { + m_format = format; + m_lineLengthLimit = lineLengthLimit; + + m_state.m_kind = State::Kind::Root; + m_state.m_flags = 0; + } + +protected: + struct State + { + enum class Kind : uint8_t + { + Root, + Object, + Array, + }; + + typedef uint8_t Flags; + struct Flag + { + enum Enum : Flags + { + HasPrevious = 0x01, + HasKey = 0x02, + }; + }; + + bool canEmitValue() const + { + switch (m_kind) + { + case Kind::Root: return (m_flags & Flag::HasPrevious) == 0; + case Kind::Array: return true; + case Kind::Object: return (m_flags & Flag::HasKey) != 0; + default: return false; + } + } + + Kind m_kind; + Flags m_flags; + }; + + void _maybeNextLine(); + void _nextLine(); + void _handleFormat(Location loc); + + Index _getLineLengthAfterIndent(); + + /// Only emits the indent if at start of line + void _maybeEmitIndent(); + void _emitIndent(); + + void _maybeEmitComma(); + void _maybeEmitFieldComma(); + + void _indent() { m_currentIndent++; } + void _dedent() { --m_currentIndent; SLANG_ASSERT(m_currentIndent >= 0); } + + /// True if the line is indented at the required level + bool _hasIndent() { return m_emittedIndent >= 0 && m_emittedIndent == m_currentIndent; } + + Index m_currentIndent = 0; + char m_indentChar = ' '; + Index m_indentCharCount = 4; + + Index m_lineIndex = 0; + Index m_lineStart = 0; + Index m_emittedIndent = -1; /// If -1 for current line there is no indent emitted + + Index m_lineLengthLimit = -1; /// The limit is only applied *AFTER* indentation + + IndentationStyle m_format; + + StringBuilder m_builder; + List<State> m_stack; + State m_state; +}; + +class JSONParser +{ +public: + SlangResult parse(JSONLexer* lexer, SourceView* sourceView, JSONListener* listener, DiagnosticSink* sink); + +protected: + SlangResult _parseValue(); + SlangResult _parseObject(); + SlangResult _parseArray(); + + SourceView* m_sourceView; + DiagnosticSink* m_sink; + JSONListener* m_listener; + JSONLexer* m_lexer; +}; + + + +} // namespace Slang + +#endif |
