summaryrefslogtreecommitdiffstats
path: root/source/compiler-core
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2021-05-27 15:38:52 -0400
committerGitHub <noreply@github.com>2021-05-27 12:38:52 -0700
commit15f5cffc3026a5faed7046ae7d75ec6b56cf4a4c (patch)
treee6c59288e453ac3eb788068a155e7aa607399009 /source/compiler-core
parent969943f4b751d3cad8ac548f9cf0f65406935bad (diff)
JSON Parser and Writer (#1859)
* #include an absolute path didn't work - because paths were taken to always be relative. * WIP JSONWriter/JSONParser. * Checking different Layout styles for JSON. * Add slang-json-parser.h/.cpp
Diffstat (limited to 'source/compiler-core')
-rw-r--r--source/compiler-core/slang-json-diagnostic-defs.h2
-rw-r--r--source/compiler-core/slang-json-lexer.cpp64
-rw-r--r--source/compiler-core/slang-json-lexer.h22
-rw-r--r--source/compiler-core/slang-json-parser.cpp384
-rw-r--r--source/compiler-core/slang-json-parser.h188
5 files changed, 659 insertions, 1 deletions
diff --git a/source/compiler-core/slang-json-diagnostic-defs.h b/source/compiler-core/slang-json-diagnostic-defs.h
index a4b260857..da3b2a28c 100644
--- a/source/compiler-core/slang-json-diagnostic-defs.h
+++ b/source/compiler-core/slang-json-diagnostic-defs.h
@@ -33,5 +33,7 @@ DIAGNOSTIC(20003, Error, endOfFileInComment, "end of file in comment")
DIAGNOSTIC(20004, Error, expectingAHexDigit, "expecting a hex digit")
DIAGNOSTIC(20005, Error, expectingADigit, "expecting a digit")
DIAGNOSTIC(20006, Error, expectingValueName, "expecting value name [null, true, false]")
+DIAGNOSTIC(20007, Error, unexpectedTokenExpectedTokenType, "unexpected '$0', expected '$1'")
+DIAGNOSTIC(20008, Error, unexpectedToken, "unexpected '$0'")
#undef DIAGNOSTIC
diff --git a/source/compiler-core/slang-json-lexer.cpp b/source/compiler-core/slang-json-lexer.cpp
index 19a5b29a7..261d5f413 100644
--- a/source/compiler-core/slang-json-lexer.cpp
+++ b/source/compiler-core/slang-json-lexer.cpp
@@ -45,6 +45,47 @@ JSONTokenType JSONLexer::_setInvalidToken()
return _setToken(JSONTokenType::Invalid, m_lexemeStart);
}
+SlangResult JSONLexer::expect(JSONTokenType type)
+{
+ if (type != peekType())
+ {
+ m_sink->diagnose(m_token.loc, JSONDiagnostics::unexpectedTokenExpectedTokenType, getJSONTokenAsText(peekType()), getJSONTokenAsText(type));
+ return SLANG_FAIL;
+ }
+
+ advance();
+ return SLANG_OK;
+}
+
+SlangResult JSONLexer::expect(JSONTokenType type, JSONToken& out)
+{
+ if (type != peekType())
+ {
+ m_sink->diagnose(m_token.loc, JSONDiagnostics::unexpectedTokenExpectedTokenType, getJSONTokenAsText(peekType()), getJSONTokenAsText(type));
+ return SLANG_FAIL;
+ }
+
+ out = m_token;
+ advance();
+ return SLANG_OK;
+}
+
+bool JSONLexer::advanceIf(JSONTokenType type)
+{
+ if (type == peekType())
+ {
+ advance();
+ return true;
+ }
+ return false;
+}
+
+UnownedStringSlice JSONLexer::getLexeme(const JSONToken& tok) const
+{
+ auto offset = m_sourceView->getRange().getOffset(tok.loc);
+ return UnownedStringSlice(m_sourceView->getContent().begin() + offset, tok.length);
+}
+
JSONTokenType JSONLexer::advance()
{
const char* cursor = m_cursor;
@@ -382,4 +423,27 @@ const char* JSONLexer::_lexWhitespace(const char* cursor)
}
}
+UnownedStringSlice getJSONTokenAsText(JSONTokenType type)
+{
+ switch (type)
+ {
+ case JSONTokenType::Invalid: return UnownedStringSlice::fromLiteral("invalid");
+ case JSONTokenType::IntegerLiteral: return UnownedStringSlice::fromLiteral("integer literal");
+ case JSONTokenType::FloatLiteral: return UnownedStringSlice::fromLiteral("float literal");
+ case JSONTokenType::StringLiteral: return UnownedStringSlice::fromLiteral("string literal");
+ case JSONTokenType::LBracket: return UnownedStringSlice::fromLiteral("[");
+ case JSONTokenType::RBracket: return UnownedStringSlice::fromLiteral("]");
+ case JSONTokenType::LBrace: return UnownedStringSlice::fromLiteral("{");
+ case JSONTokenType::RBrace: return UnownedStringSlice::fromLiteral("}");
+ case JSONTokenType::Comma: return UnownedStringSlice::fromLiteral(",");
+ case JSONTokenType::Colon: return UnownedStringSlice::fromLiteral(":");
+ case JSONTokenType::True: return UnownedStringSlice::fromLiteral("true");
+ case JSONTokenType::False: return UnownedStringSlice::fromLiteral("false");
+ case JSONTokenType::Null: return UnownedStringSlice::fromLiteral("null");
+ case JSONTokenType::EndOfFile: return UnownedStringSlice::fromLiteral("end of file");
+ default: break;
+ }
+ SLANG_UNEXPECTED("JSONTokenType not known");
+}
+
} // namespace Slang
diff --git a/source/compiler-core/slang-json-lexer.h b/source/compiler-core/slang-json-lexer.h
index 03f16d445..ee4b60f75 100644
--- a/source/compiler-core/slang-json-lexer.h
+++ b/source/compiler-core/slang-json-lexer.h
@@ -35,14 +35,34 @@ struct JSONToken
uint32_t length; ///< The length of the token in bytes
};
+UnownedStringSlice getJSONTokenAsText(JSONTokenType type);
+
class JSONLexer
{
public:
+ /// Peek the current token
JSONToken& peekToken() { return m_token; }
+ /// Peek the current type
JSONTokenType peekType() { return m_token.type; }
-
+ /// Peek the current SourceLoc
+ SourceLoc peekLoc() { return m_token.loc; }
+
+ /// Get the lexeme of JSONToken
+ UnownedStringSlice getLexeme(const JSONToken& tok) const;
+ /// Peek the lexeme at the current position
+ UnownedStringSlice peekLexeme() const { return getLexeme(m_token); }
+
JSONTokenType advance();
+ /// Expects a token of type type. If found advances, if not returns an error and outputs to diagnostic sink
+ SlangResult expect(JSONTokenType type);
+ /// Same as expect except out will hold the token.
+ SlangResult expect(JSONTokenType type, JSONToken& out);
+
+ /// Returns true and advances if current token is type
+ bool advanceIf(JSONTokenType type);
+
+ /// Must be called before use
SlangResult init(SourceView* sourceView, DiagnosticSink* sink);
protected:
diff --git a/source/compiler-core/slang-json-parser.cpp b/source/compiler-core/slang-json-parser.cpp
new file mode 100644
index 000000000..478b02fb8
--- /dev/null
+++ b/source/compiler-core/slang-json-parser.cpp
@@ -0,0 +1,384 @@
+// slang-json-parser.cpp
+#include "slang-json-parser.h"
+
+#include "slang-json-diagnostics.h"
+
+/*
+https://www.json.org/json-en.html
+*/
+
+namespace Slang {
+
+SlangResult JSONParser::_parseObject()
+{
+ SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::LBrace));
+
+ m_listener->startObject();
+
+ if (m_lexer->advanceIf(JSONTokenType::RBrace))
+ {
+ m_listener->endObject();
+ return SLANG_OK;
+ }
+
+ while (true)
+ {
+ JSONToken keyToken;
+ SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::StringLiteral, keyToken));
+ m_listener->addLexemeKey(m_lexer->getLexeme(keyToken));
+
+ SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::Colon));
+
+ SLANG_RETURN_ON_FAIL(_parseValue());
+ if (m_lexer->advanceIf(JSONTokenType::Comma))
+ {
+ continue;
+ }
+
+ break;
+ }
+
+ SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::RBrace));
+ m_listener->endObject();
+ return SLANG_OK;
+}
+
+SlangResult JSONParser::_parseArray()
+{
+ SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::LBracket));
+
+ m_listener->startArray();
+
+ if (m_lexer->advanceIf(JSONTokenType::RBracket))
+ {
+ m_listener->endArray();
+ return SLANG_OK;
+ }
+
+ while (true)
+ {
+ SLANG_RETURN_ON_FAIL(_parseValue());
+ if (m_lexer->advanceIf(JSONTokenType::Comma))
+ {
+ continue;
+ }
+ break;
+ }
+
+ SLANG_RETURN_ON_FAIL(m_lexer->expect(JSONTokenType::RBracket));
+ m_listener->endArray();
+ return SLANG_OK;
+}
+
+SlangResult JSONParser::_parseValue()
+{
+ switch (m_lexer->peekType())
+ {
+ case JSONTokenType::True:
+ case JSONTokenType::False:
+ case JSONTokenType::Null:
+ case JSONTokenType::IntegerLiteral:
+ case JSONTokenType::FloatLiteral:
+ case JSONTokenType::StringLiteral:
+ {
+ m_listener->addLexemeValue(m_lexer->peekType(), m_lexer->peekLexeme());
+ m_lexer->advance();
+ return SLANG_OK;
+ }
+ case JSONTokenType::LBracket:
+ {
+ return _parseArray();
+ }
+ case JSONTokenType::LBrace:
+ {
+ return _parseObject();
+ }
+ default:
+ {
+ m_sink->diagnose(m_lexer->peekLoc(), JSONDiagnostics::unexpectedToken, getJSONTokenAsText(m_lexer->peekType()));
+ return SLANG_FAIL;
+ }
+ case JSONTokenType::Invalid:
+ {
+ // It's a lex error, so just fail
+ return SLANG_FAIL;
+ }
+ }
+}
+
+SlangResult JSONParser::parse(JSONLexer* lexer, SourceView* sourceView, JSONListener* listener, DiagnosticSink* sink)
+{
+ m_sourceView = sourceView;
+ m_lexer = lexer;
+ m_listener = listener;
+ m_sink = sink;
+
+ SLANG_RETURN_ON_FAIL(_parseValue());
+
+ return m_lexer->expect(JSONTokenType::EndOfFile);
+}
+
+/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ JSONWriter
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
+
+Index JSONWriter::_getLineLengthAfterIndent()
+{
+ if (m_emittedIndent < 0)
+ {
+ return 0;
+ }
+
+ Index lineLength = m_builder.getLength() - m_lineStart;
+ return lineLength - m_emittedIndent * m_indentCharCount;
+}
+
+
+void JSONWriter::_emitIndent()
+{
+ m_builder.appendRepeatedChar(m_indentChar, m_currentIndent * m_indentCharCount);
+ m_emittedIndent = m_currentIndent;
+ SLANG_ASSERT(m_emittedIndent >= 0);
+}
+
+void JSONWriter::_maybeEmitIndent()
+{
+ if (m_emittedIndent < 0)
+ {
+ _emitIndent();
+ }
+}
+
+void JSONWriter::_nextLine()
+{
+ m_builder << "\n";
+ m_lineStart = m_builder.getLength();
+ m_lineIndex++;
+ m_emittedIndent = -1;
+}
+
+void JSONWriter::_maybeNextLine()
+{
+ // Nothing has been emitted, because nothing has been indented, and we must indent before an emit
+ if (m_emittedIndent < 0)
+ {
+ }
+ else
+ {
+ _nextLine();
+ }
+}
+
+void JSONWriter::_handleFormat(Location loc)
+{
+ switch (m_format)
+ {
+ case IndentationStyle::Allman:
+ {
+ if (isComma(loc))
+ {
+ _maybeNextLine();
+ }
+ else
+ {
+ if (isBefore(loc))
+ {
+ _maybeNextLine();
+ if (isClose(loc))
+ {
+ _dedent();
+ }
+ }
+ else
+ {
+ _maybeNextLine();
+ if (isOpen(loc))
+ {
+ _indent();
+ }
+ }
+ }
+ break;
+ }
+ case IndentationStyle::KNR:
+ {
+ if (isComma(loc))
+ {
+ if (loc == Location::FieldComma ||
+ (m_lineLengthLimit > 0 && _getLineLengthAfterIndent() > m_lineLengthLimit))
+ {
+ _maybeNextLine();
+ }
+ }
+ else
+ {
+ if (isBefore(loc))
+ {
+ if (isClose(loc))
+ {
+ _maybeNextLine();
+ _dedent();
+ }
+ }
+ else
+ {
+ _maybeNextLine();
+ if (isOpen(loc))
+ {
+ _indent();
+ }
+ }
+ }
+ break;
+ }
+ }
+}
+
+void JSONWriter::_maybeEmitComma()
+{
+ if (m_state.m_flags & State::Flag::HasPrevious)
+ {
+ _maybeEmitIndent();
+ m_builder << ", ";
+ _handleFormat(Location::Comma);
+ }
+}
+
+void JSONWriter::_maybeEmitFieldComma()
+{
+ if (m_state.m_flags & State::Flag::HasPrevious)
+ {
+ _maybeEmitIndent();
+ m_builder << ", ";
+ _handleFormat(Location::FieldComma);
+ }
+}
+
+void JSONWriter::startObject()
+{
+ SLANG_ASSERT(m_state.canEmitValue());
+
+ _maybeEmitComma();
+
+ _handleFormat(Location::BeforeOpenObject);
+ _maybeEmitIndent();
+ m_builder << "{";
+ _handleFormat(Location::AfterOpenObject);
+
+ m_state.m_flags |= State::Flag::HasPrevious;
+ m_state.m_flags &= State::Flag::HasKey;
+
+ m_stack.add(m_state);
+
+ m_state.m_kind = State::Kind::Object;
+ m_state.m_flags = 0;
+}
+
+void JSONWriter::endObject()
+{
+ SLANG_ASSERT(m_state.m_kind == State::Kind::Object);
+
+ _handleFormat(Location::BeforeCloseObject);
+ _maybeEmitIndent();
+ m_builder << "}";
+ _handleFormat(Location::AfterCloseObject);
+
+ m_state = m_stack.getLast();
+ m_stack.removeLast();
+}
+
+void JSONWriter::startArray()
+{
+ SLANG_ASSERT(m_state.canEmitValue());
+
+ _maybeEmitComma();
+
+ _handleFormat(Location::BeforeOpenArray);
+ _maybeEmitIndent();
+ m_builder << "[";
+ _handleFormat(Location::AfterOpenArray);
+
+ m_state.m_flags |= State::Flag::HasPrevious;
+ m_state.m_flags &= State::Flag::HasKey;
+
+ m_stack.add(m_state);
+
+ m_state.m_kind = State::Kind::Array;
+ m_state.m_flags = 0;
+}
+
+void JSONWriter::endArray()
+{
+ SLANG_ASSERT(m_state.m_kind == State::Kind::Array);
+
+ _handleFormat(Location::BeforeCloseArray);
+ _maybeEmitIndent();
+ m_builder << "]";
+ _handleFormat(Location::AfterCloseArray);
+
+ m_state = m_stack.getLast();
+ m_stack.removeLast();
+}
+
+void JSONWriter::addLexemeKey(const UnownedStringSlice& key)
+{
+ SLANG_ASSERT(m_state.m_kind == State::Kind::Object && (m_state.m_flags & State::Flag::HasKey) == 0);
+
+ _maybeEmitFieldComma();
+
+ // It should be quoted
+ SLANG_ASSERT(key.getLength() >= 2 && key[0] == '"' && key[key.getLength() - 1] == '"');
+
+ _maybeEmitIndent();
+ m_builder << key << " : ";
+
+ m_state.m_flags |= State::Flag::HasKey;
+ // We don't want it to emit a , after the :
+ m_state.m_flags &= ~State::Flag::HasPrevious;
+}
+
+void JSONWriter::addLexemeValue(JSONTokenType type, const UnownedStringSlice& value)
+{
+ SLANG_ASSERT(m_state.canEmitValue());
+
+ _maybeEmitComma();
+ _maybeEmitIndent();
+
+ switch (type)
+ {
+ case JSONTokenType::IntegerLiteral:
+ case JSONTokenType::FloatLiteral:
+ case JSONTokenType::StringLiteral:
+ {
+ m_builder << value;
+ break;
+ }
+ case JSONTokenType::True:
+ {
+ m_builder << UnownedStringSlice::fromLiteral("true");
+ break;
+ }
+ case JSONTokenType::False:
+ {
+ m_builder << UnownedStringSlice::fromLiteral("false");
+ break;
+ }
+ case JSONTokenType::Null:
+ {
+ m_builder << UnownedStringSlice::fromLiteral("null");
+ break;
+ }
+ default:
+ {
+ SLANG_ASSERT(!"Can only emit values");
+ }
+ }
+ // We have a previous
+ m_state.m_flags |= State::Flag::HasPrevious;
+ // We don't have a key
+ m_state.m_flags &= ~State::Flag::HasKey;
+}
+
+} // namespace Slang
diff --git a/source/compiler-core/slang-json-parser.h b/source/compiler-core/slang-json-parser.h
new file mode 100644
index 000000000..2e907abb2
--- /dev/null
+++ b/source/compiler-core/slang-json-parser.h
@@ -0,0 +1,188 @@
+// slang-json-parser.h
+#ifndef SLANG_JSON_PARSER_H
+#define SLANG_JSON_PARSER_H
+
+#include "slang-json-lexer.h"
+
+
+namespace Slang {
+
+class JSONListener
+{
+public:
+ /// Start an object
+ virtual void startObject() = 0;
+ /// End an object
+ virtual void endObject() = 0;
+ /// Start an array
+ virtual void startArray() = 0;
+ /// End and array
+ virtual void endArray() = 0;
+
+ /// Add the key lexeme. Must be followed by addLexemeValue.
+ virtual void addLexemeKey(const UnownedStringSlice& key) = 0;
+ /// Can be performed in an array or after an addLexemeKey in an object
+ virtual void addLexemeValue(JSONTokenType type, const UnownedStringSlice& value) = 0;
+};
+
+class JSONWriter : public JSONListener
+{
+public:
+ /*
+ https://en.wikipedia.org/wiki/Indentation_style
+ */
+ enum class IndentationStyle
+ {
+ Allman, ///< After every value, and opening, closing all other types
+ KNR, ///< K&R like. Fields have CR.
+ };
+
+ enum class LocationType : uint8_t
+ {
+ Object,
+ Array,
+ Comma,
+ };
+
+ // NOTE! Order must be kept the same without fixing is functions below
+ enum class Location
+ {
+ BeforeOpenObject,
+ BeforeCloseObject,
+ AfterOpenObject,
+ AfterCloseObject,
+
+ BeforeOpenArray,
+ BeforeCloseArray,
+ AfterOpenArray,
+ AfterCloseArray,
+
+ FieldComma,
+ Comma,
+
+ CountOf,
+ };
+
+ static LocationType getLocationType(Location loc) { return isObject(loc) ? LocationType::Object : (isComma(loc) ? LocationType::Comma : LocationType::Array); }
+
+ static bool isObjectLike(Location loc) { return Index(loc) <= Index(Location::AfterCloseArray); }
+ static bool isObject(Location loc) { return Index(loc) <= Index(Location::AfterCloseObject); }
+ static bool isArray(Location loc) { return Index(loc) >= Index(Location::BeforeOpenArray) && Index(loc) <= Index(Location::AfterCloseArray); }
+ static bool isComma(Location loc) { return Index(loc) >= Index(Location::FieldComma); }
+ static bool isOpen(Location loc) { return isObjectLike(loc) && (Index(loc) & 1) == 0; }
+ static bool isClose(Location loc) { return isObjectLike(loc) && (Index(loc) & 1) != 0; }
+ static bool isBefore(Location loc) { return isObjectLike(loc) && (Index(loc) & 2) == 0; }
+ static bool isAfter(Location loc) { return isObjectLike(loc) && (Index(loc) & 2) != 0; }
+
+ // Implement JSONListener
+ virtual void startObject() SLANG_OVERRIDE;
+ virtual void endObject() SLANG_OVERRIDE;
+ virtual void startArray() SLANG_OVERRIDE;
+ virtual void endArray() SLANG_OVERRIDE;
+ virtual void addLexemeKey(const UnownedStringSlice& key) SLANG_OVERRIDE;
+ virtual void addLexemeValue(JSONTokenType type, const UnownedStringSlice& value) SLANG_OVERRIDE;
+
+ /// Get the builder
+ StringBuilder& getBuilder() { return m_builder; }
+
+ JSONWriter(IndentationStyle format, Index lineLengthLimit = -1)
+ {
+ m_format = format;
+ m_lineLengthLimit = lineLengthLimit;
+
+ m_state.m_kind = State::Kind::Root;
+ m_state.m_flags = 0;
+ }
+
+protected:
+ struct State
+ {
+ enum class Kind : uint8_t
+ {
+ Root,
+ Object,
+ Array,
+ };
+
+ typedef uint8_t Flags;
+ struct Flag
+ {
+ enum Enum : Flags
+ {
+ HasPrevious = 0x01,
+ HasKey = 0x02,
+ };
+ };
+
+ bool canEmitValue() const
+ {
+ switch (m_kind)
+ {
+ case Kind::Root: return (m_flags & Flag::HasPrevious) == 0;
+ case Kind::Array: return true;
+ case Kind::Object: return (m_flags & Flag::HasKey) != 0;
+ default: return false;
+ }
+ }
+
+ Kind m_kind;
+ Flags m_flags;
+ };
+
+ void _maybeNextLine();
+ void _nextLine();
+ void _handleFormat(Location loc);
+
+ Index _getLineLengthAfterIndent();
+
+ /// Only emits the indent if at start of line
+ void _maybeEmitIndent();
+ void _emitIndent();
+
+ void _maybeEmitComma();
+ void _maybeEmitFieldComma();
+
+ void _indent() { m_currentIndent++; }
+ void _dedent() { --m_currentIndent; SLANG_ASSERT(m_currentIndent >= 0); }
+
+ /// True if the line is indented at the required level
+ bool _hasIndent() { return m_emittedIndent >= 0 && m_emittedIndent == m_currentIndent; }
+
+ Index m_currentIndent = 0;
+ char m_indentChar = ' ';
+ Index m_indentCharCount = 4;
+
+ Index m_lineIndex = 0;
+ Index m_lineStart = 0;
+ Index m_emittedIndent = -1; /// If -1 for current line there is no indent emitted
+
+ Index m_lineLengthLimit = -1; /// The limit is only applied *AFTER* indentation
+
+ IndentationStyle m_format;
+
+ StringBuilder m_builder;
+ List<State> m_stack;
+ State m_state;
+};
+
+class JSONParser
+{
+public:
+ SlangResult parse(JSONLexer* lexer, SourceView* sourceView, JSONListener* listener, DiagnosticSink* sink);
+
+protected:
+ SlangResult _parseValue();
+ SlangResult _parseObject();
+ SlangResult _parseArray();
+
+ SourceView* m_sourceView;
+ DiagnosticSink* m_sink;
+ JSONListener* m_listener;
+ JSONLexer* m_lexer;
+};
+
+
+
+} // namespace Slang
+
+#endif