diff options
| author | Tim Foley <tfoley@nvidia.com> | 2017-08-14 14:48:37 -0700 |
|---|---|---|
| committer | Tim Foley <tfoley@nvidia.com> | 2017-08-14 14:48:37 -0700 |
| commit | 9885c972a6bfa6f856e505cdd90d9b71fdbdadaf (patch) | |
| tree | 7314b26e21ded966b6a4fe2430f0421c0c0970bd /source/slang/lexer.cpp | |
| parent | 7f57ea4ad86c2a3eb5a14fef458e711845c1f87e (diff) | |
Add an explicit `Name` type
Fixes #23
Up to this point, the compiler has used the ordinary `String` type to represent declaration names, which means a bunch of lookup structures throughout the compiler were string-to-whatever maps, which can reduce efficiency.
It also means that things like the `Token` type end up carying a `String` by value and paying for things like reference-counting.
This change adds a `Name` type that is used to represent names of variables, types, macros, etc.
Names are cached and unique'd globally for a session, and the string-to-name mapping gets done during lexing.
From that point on, most mapping is from pointers, which should make all the various table lookups faster.
More importantly (possibly), this brings us one step closer to being able to pool-allocate the AST nodes.
Diffstat (limited to 'source/slang/lexer.cpp')
| -rw-r--r-- | source/slang/lexer.cpp | 61 |
1 files changed, 16 insertions, 45 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp index 351e3f664..11c70d1f5 100644 --- a/source/slang/lexer.cpp +++ b/source/slang/lexer.cpp @@ -65,7 +65,7 @@ namespace Slang if (!mCursor) return SourceLoc(); SLANG_ASSERT(mCursor); - return mCursor->Position; + return mCursor->loc; } Token TokenReader::AdvanceToken() @@ -85,10 +85,12 @@ namespace Slang void Lexer::initialize( SourceFile* inSourceFile, - DiagnosticSink* inSink) + DiagnosticSink* inSink, + NamePool* inNamePool) { - sourceFile = inSourceFile; - sink = inSink; + sourceFile = inSourceFile; + sink = inSink; + namePool = inNamePool; auto content = inSourceFile->content; @@ -222,6 +224,8 @@ namespace Slang lexer->cursor++; handleNewLineInner(lexer, d); + lexer->tokenFlags |= TokenFlag::ScrubbingNeeded; + // Now try again, looking at the character after the // escaped nmewline. continue; @@ -1215,11 +1219,11 @@ namespace Slang Token Lexer::lexToken() { - auto flags = this->tokenFlags; + auto& flags = this->tokenFlags; for(;;) { Token token; - token.Position = getSourceLoc(this); + token.loc = getSourceLoc(this); char const* textBegin = cursor; @@ -1246,7 +1250,7 @@ namespace Slang // We don't want to skip the end-of-file token, but we *do* // want to make sure it has appropriate flags to make our life easier case TokenType::EndOfFile: - flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; + flags |= TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; break; // We will also do some book-keeping around preprocessor directives here: @@ -1316,6 +1320,11 @@ namespace Slang this->tokenFlags = 0; + if (tokenType == TokenType::Identifier) + { + token.ptrValue = this->namePool->getName(token.Content); + } + return token; } } @@ -1332,42 +1341,4 @@ namespace Slang return tokenList; } } - - - -#if 0 - TokenList Lexer::Parse(const String & fileName, const String & str, DiagnosticSink * sink) - { - TokenList tokenList; - tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, SourceLoc pos) - { - auto curChar = str[pos.Pos]; - switch (errType) - { - case TokenizeErrorType::InvalidCharacter: - // Check if inside the ASCII "printable" range - if(curChar >= 0x20 && curChar <= 0x7E) - { - char buffer[] = { curChar, 0 }; - sink->diagnose(pos, Diagnostics::illegalCharacterPrint, buffer); - } - else - { - // Fallback: print as hexadecimal - sink->diagnose(pos, Diagnostics::illegalCharacterHex, String((unsigned char)curChar, 16)); - } - break; - case TokenizeErrorType::InvalidEscapeSequence: - sink->diagnose(pos, Diagnostics::illegalCharacterLiteral); - break; - default: - break; - } - }); - - // Add an end-of-file token so that we can reference it in diagnostic messages - tokenList.mTokens.Add(Token(TokenType::EndOfFile, "", 0, 0, 0, fileName, TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace)); - return tokenList; - } -#endif }
\ No newline at end of file |
