summaryrefslogtreecommitdiffstats
path: root/source/slang/lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang/lexer.cpp')
-rw-r--r--source/slang/lexer.cpp81
1 files changed, 53 insertions, 28 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 84b34c9a9..fe01878d1 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -1,4 +1,12 @@
-#include "Lexer.h"
+// lexer.cpp
+#include "lexer.h"
+
+// This file implements the lexer/scanner, which is responsible for taking a raw stream of
+// input bytes and turning it into semantically useful tokens.
+//
+
+#include "compiler.h"
+#include "source-loc.h"
#include <assert.h>
@@ -6,7 +14,7 @@ namespace Slang
{
static Token GetEndOfFileToken()
{
- return Token(TokenType::EndOfFile, "", 0, 0, 0, "");
+ return Token(TokenType::EndOfFile, "", SourceLoc());
}
Token* TokenList::begin() const
@@ -52,10 +60,10 @@ namespace Slang
return mCursor->type;
}
- CodePosition TokenReader::PeekLoc() const
+ SourceLoc TokenReader::PeekLoc() const
{
if (!mCursor)
- return CodePosition();
+ return SourceLoc();
SLANG_ASSERT(mCursor);
return mCursor->Position;
}
@@ -75,18 +83,22 @@ namespace Slang
// Lexer
- Lexer::Lexer(
- String const& path,
- String const& content,
- DiagnosticSink* sink)
- : path(path)
- , content(content)
- , sink(sink)
+ void Lexer::initialize(
+ SourceFile* inSourceFile,
+ DiagnosticSink* inSink)
{
+ sourceFile = inSourceFile;
+ sink = inSink;
+
+ auto content = inSourceFile->content;
+
+ begin = content.begin();
cursor = content.begin();
end = content.end();
- loc = CodePosition(1, 1, 0, path);
+ spellingStartLoc = inSourceFile->sourceRange.begin;
+ presumedStartLoc = spellingStartLoc;
+
tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
lexerFlags = 0;
}
@@ -142,9 +154,6 @@ namespace Slang
{
advanceRaw(lexer);
}
-
- lexer->loc.Line++;
- lexer->loc.Col = 1;
}
// Look ahead one code point, dealing with complications like
@@ -224,12 +233,7 @@ namespace Slang
// TODO: Need to handle non-ASCII code points.
- // Default case is to advance by one location
- // and return the raw byte we saw.
-
- lexer->loc.Col++;
- lexer->loc.Pos++;
-
+ // Default case is to return the raw byte we saw.
return c;
}
}
@@ -323,6 +327,27 @@ namespace Slang
}
}
+ static SourceLoc getSourceLoc(Lexer* lexer)
+ {
+ return lexer->presumedStartLoc + (lexer->cursor - lexer->begin);
+ }
+
+ // Begin overriding the reported locations of tokens,
+ // based on a `#line` directives
+ void Lexer::startOverridingSourceLocations(
+ SourceLoc loc)
+ {
+ if(loc.isValid())
+ {
+ presumedStartLoc = loc;
+ }
+ }
+
+ void Lexer::stopOverridingSourceLocations()
+ {
+ presumedStartLoc = spellingStartLoc;
+ }
+
static void lexDigits(Lexer* lexer, int base)
{
for(;;)
@@ -355,7 +380,7 @@ namespace Slang
if(digitVal >= base)
{
char buffer[] = { (char) c, 0 };
- lexer->sink->diagnose(lexer->loc, Diagnostics::invalidDigitForBase, buffer, base);
+ lexer->sink->diagnose(getSourceLoc(lexer), Diagnostics::invalidDigitForBase, buffer, base);
}
advance(lexer);
@@ -695,11 +720,11 @@ namespace Slang
switch(c)
{
case kEOF:
- lexer->sink->diagnose(lexer->loc, Diagnostics::endOfFileInLiteral);
+ lexer->sink->diagnose(getSourceLoc(lexer), Diagnostics::endOfFileInLiteral);
return;
case '\n': case '\r':
- lexer->sink->diagnose(lexer->loc, Diagnostics::newlineInLiteral);
+ lexer->sink->diagnose(getSourceLoc(lexer), Diagnostics::newlineInLiteral);
return;
case '\\':
@@ -952,7 +977,7 @@ namespace Slang
case '0':
{
- auto loc = lexer->loc;
+ auto loc = getSourceLoc(lexer);
advance(lexer);
switch(peek(lexer))
{
@@ -1170,7 +1195,7 @@ namespace Slang
// If none of the above cases matched, then we have an
// unexpected/invalid character.
- auto loc = lexer->loc;
+ auto loc = getSourceLoc(lexer);
auto sink = lexer->sink;
int c = advance(lexer);
if(c >= 0x20 && c <= 0x7E)
@@ -1194,7 +1219,7 @@ namespace Slang
for(;;)
{
Token token;
- token.Position = loc;
+ token.Position = getSourceLoc(this);
char const* textBegin = cursor;
@@ -1314,7 +1339,7 @@ namespace Slang
TokenList Lexer::Parse(const String & fileName, const String & str, DiagnosticSink * sink)
{
TokenList tokenList;
- tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, CodePosition pos)
+ tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, SourceLoc pos)
{
auto curChar = str[pos.Pos];
switch (errType)