summaryrefslogtreecommitdiffstats
path: root/source/slang/lexer.cpp
diff options
context:
space:
mode:
authorTim Foley <tfoley@nvidia.com>2017-08-14 14:48:37 -0700
committerTim Foley <tfoley@nvidia.com>2017-08-14 14:48:37 -0700
commit9885c972a6bfa6f856e505cdd90d9b71fdbdadaf (patch)
tree7314b26e21ded966b6a4fe2430f0421c0c0970bd /source/slang/lexer.cpp
parent7f57ea4ad86c2a3eb5a14fef458e711845c1f87e (diff)
Add an explicit `Name` type
Fixes #23 Up to this point, the compiler has used the ordinary `String` type to represent declaration names, which means a bunch of lookup structures throughout the compiler were string-to-whatever maps, which can reduce efficiency. It also means that things like the `Token` type end up carying a `String` by value and paying for things like reference-counting. This change adds a `Name` type that is used to represent names of variables, types, macros, etc. Names are cached and unique'd globally for a session, and the string-to-name mapping gets done during lexing. From that point on, most mapping is from pointers, which should make all the various table lookups faster. More importantly (possibly), this brings us one step closer to being able to pool-allocate the AST nodes.
Diffstat (limited to 'source/slang/lexer.cpp')
-rw-r--r--source/slang/lexer.cpp61
1 files changed, 16 insertions, 45 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 351e3f664..11c70d1f5 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -65,7 +65,7 @@ namespace Slang
if (!mCursor)
return SourceLoc();
SLANG_ASSERT(mCursor);
- return mCursor->Position;
+ return mCursor->loc;
}
Token TokenReader::AdvanceToken()
@@ -85,10 +85,12 @@ namespace Slang
void Lexer::initialize(
SourceFile* inSourceFile,
- DiagnosticSink* inSink)
+ DiagnosticSink* inSink,
+ NamePool* inNamePool)
{
- sourceFile = inSourceFile;
- sink = inSink;
+ sourceFile = inSourceFile;
+ sink = inSink;
+ namePool = inNamePool;
auto content = inSourceFile->content;
@@ -222,6 +224,8 @@ namespace Slang
lexer->cursor++;
handleNewLineInner(lexer, d);
+ lexer->tokenFlags |= TokenFlag::ScrubbingNeeded;
+
// Now try again, looking at the character after the
// escaped nmewline.
continue;
@@ -1215,11 +1219,11 @@ namespace Slang
Token Lexer::lexToken()
{
- auto flags = this->tokenFlags;
+ auto& flags = this->tokenFlags;
for(;;)
{
Token token;
- token.Position = getSourceLoc(this);
+ token.loc = getSourceLoc(this);
char const* textBegin = cursor;
@@ -1246,7 +1250,7 @@ namespace Slang
// We don't want to skip the end-of-file token, but we *do*
// want to make sure it has appropriate flags to make our life easier
case TokenType::EndOfFile:
- flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
+ flags |= TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
break;
// We will also do some book-keeping around preprocessor directives here:
@@ -1316,6 +1320,11 @@ namespace Slang
this->tokenFlags = 0;
+ if (tokenType == TokenType::Identifier)
+ {
+ token.ptrValue = this->namePool->getName(token.Content);
+ }
+
return token;
}
}
@@ -1332,42 +1341,4 @@ namespace Slang
return tokenList;
}
}
-
-
-
-#if 0
- TokenList Lexer::Parse(const String & fileName, const String & str, DiagnosticSink * sink)
- {
- TokenList tokenList;
- tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, SourceLoc pos)
- {
- auto curChar = str[pos.Pos];
- switch (errType)
- {
- case TokenizeErrorType::InvalidCharacter:
- // Check if inside the ASCII "printable" range
- if(curChar >= 0x20 && curChar <= 0x7E)
- {
- char buffer[] = { curChar, 0 };
- sink->diagnose(pos, Diagnostics::illegalCharacterPrint, buffer);
- }
- else
- {
- // Fallback: print as hexadecimal
- sink->diagnose(pos, Diagnostics::illegalCharacterHex, String((unsigned char)curChar, 16));
- }
- break;
- case TokenizeErrorType::InvalidEscapeSequence:
- sink->diagnose(pos, Diagnostics::illegalCharacterLiteral);
- break;
- default:
- break;
- }
- });
-
- // Add an end-of-file token so that we can reference it in diagnostic messages
- tokenList.mTokens.Add(Token(TokenType::EndOfFile, "", 0, 0, 0, fileName, TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace));
- return tokenList;
- }
-#endif
} \ No newline at end of file