diff options
Diffstat (limited to 'source/slang/lexer.cpp')
| -rw-r--r-- | source/slang/lexer.cpp | 81 |
1 files changed, 53 insertions, 28 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp index 84b34c9a9..fe01878d1 100644 --- a/source/slang/lexer.cpp +++ b/source/slang/lexer.cpp @@ -1,4 +1,12 @@ -#include "Lexer.h" +// lexer.cpp +#include "lexer.h" + +// This file implements the lexer/scanner, which is responsible for taking a raw stream of +// input bytes and turning it into semantically useful tokens. +// + +#include "compiler.h" +#include "source-loc.h" #include <assert.h> @@ -6,7 +14,7 @@ namespace Slang { static Token GetEndOfFileToken() { - return Token(TokenType::EndOfFile, "", 0, 0, 0, ""); + return Token(TokenType::EndOfFile, "", SourceLoc()); } Token* TokenList::begin() const @@ -52,10 +60,10 @@ namespace Slang return mCursor->type; } - CodePosition TokenReader::PeekLoc() const + SourceLoc TokenReader::PeekLoc() const { if (!mCursor) - return CodePosition(); + return SourceLoc(); SLANG_ASSERT(mCursor); return mCursor->Position; } @@ -75,18 +83,22 @@ namespace Slang // Lexer - Lexer::Lexer( - String const& path, - String const& content, - DiagnosticSink* sink) - : path(path) - , content(content) - , sink(sink) + void Lexer::initialize( + SourceFile* inSourceFile, + DiagnosticSink* inSink) { + sourceFile = inSourceFile; + sink = inSink; + + auto content = inSourceFile->content; + + begin = content.begin(); cursor = content.begin(); end = content.end(); - loc = CodePosition(1, 1, 0, path); + spellingStartLoc = inSourceFile->sourceRange.begin; + presumedStartLoc = spellingStartLoc; + tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; lexerFlags = 0; } @@ -142,9 +154,6 @@ namespace Slang { advanceRaw(lexer); } - - lexer->loc.Line++; - lexer->loc.Col = 1; } // Look ahead one code point, dealing with complications like @@ -224,12 +233,7 @@ namespace Slang // TODO: Need to handle non-ASCII code points. - // Default case is to advance by one location - // and return the raw byte we saw. - - lexer->loc.Col++; - lexer->loc.Pos++; - + // Default case is to return the raw byte we saw. return c; } } @@ -323,6 +327,27 @@ namespace Slang } } + static SourceLoc getSourceLoc(Lexer* lexer) + { + return lexer->presumedStartLoc + (lexer->cursor - lexer->begin); + } + + // Begin overriding the reported locations of tokens, + // based on a `#line` directives + void Lexer::startOverridingSourceLocations( + SourceLoc loc) + { + if(loc.isValid()) + { + presumedStartLoc = loc; + } + } + + void Lexer::stopOverridingSourceLocations() + { + presumedStartLoc = spellingStartLoc; + } + static void lexDigits(Lexer* lexer, int base) { for(;;) @@ -355,7 +380,7 @@ namespace Slang if(digitVal >= base) { char buffer[] = { (char) c, 0 }; - lexer->sink->diagnose(lexer->loc, Diagnostics::invalidDigitForBase, buffer, base); + lexer->sink->diagnose(getSourceLoc(lexer), Diagnostics::invalidDigitForBase, buffer, base); } advance(lexer); @@ -695,11 +720,11 @@ namespace Slang switch(c) { case kEOF: - lexer->sink->diagnose(lexer->loc, Diagnostics::endOfFileInLiteral); + lexer->sink->diagnose(getSourceLoc(lexer), Diagnostics::endOfFileInLiteral); return; case '\n': case '\r': - lexer->sink->diagnose(lexer->loc, Diagnostics::newlineInLiteral); + lexer->sink->diagnose(getSourceLoc(lexer), Diagnostics::newlineInLiteral); return; case '\\': @@ -952,7 +977,7 @@ namespace Slang case '0': { - auto loc = lexer->loc; + auto loc = getSourceLoc(lexer); advance(lexer); switch(peek(lexer)) { @@ -1170,7 +1195,7 @@ namespace Slang // If none of the above cases matched, then we have an // unexpected/invalid character. - auto loc = lexer->loc; + auto loc = getSourceLoc(lexer); auto sink = lexer->sink; int c = advance(lexer); if(c >= 0x20 && c <= 0x7E) @@ -1194,7 +1219,7 @@ namespace Slang for(;;) { Token token; - token.Position = loc; + token.Position = getSourceLoc(this); char const* textBegin = cursor; @@ -1314,7 +1339,7 @@ namespace Slang TokenList Lexer::Parse(const String & fileName, const String & str, DiagnosticSink * sink) { TokenList tokenList; - tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, CodePosition pos) + tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, SourceLoc pos) { auto curChar = str[pos.Pos]; switch (errType) |
