Add an explicit `Name` type

Fixes #23 Up to this point, the compiler has used the ordinary `String` type to represent declaration names, which means a bunch of lookup structures throughout the compiler were string-to-whatever maps, which can reduce efficiency. It also means that things like the `Token` type end up carying a `String` by value and paying for things like reference-counting. This change adds a `Name` type that is used to represent names of variables, types, macros, etc. Names are cached and unique'd globally for a session, and the string-to-name mapping gets done during lexing. From that point on, most mapping is from pointers, which should make all the various table lookups faster. More importantly (possibly), this brings us one step closer to being able to pool-allocate the AST nodes.
author: Tim Foley <tfoley@nvidia.com> 2017-08-14 14:48:37 -0700
committer: Tim Foley <tfoley@nvidia.com> 2017-08-14 14:48:37 -0700
commit: 9885c972a6bfa6f856e505cdd90d9b71fdbdadaf (patch)
tree: 7314b26e21ded966b6a4fe2430f0421c0c0970bd /source/slang/lexer.cpp
parent: 7f57ea4ad86c2a3eb5a14fef458e711845c1f87e (diff)
1 files changed, 16 insertions, 45 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 351e3f664..11c70d1f5 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -65,7 +65,7 @@ namespace Slang
         if (!mCursor)
             return SourceLoc();
         SLANG_ASSERT(mCursor);
-        return mCursor->Position;
+        return mCursor->loc;
     }
 
     Token TokenReader::AdvanceToken()
@@ -85,10 +85,12 @@ namespace Slang
 
     void Lexer::initialize(
         SourceFile*     inSourceFile,
-        DiagnosticSink* inSink)
+        DiagnosticSink* inSink,
+        NamePool*       inNamePool)
     {
-        sourceFile = inSourceFile;
-        sink = inSink;
+        sourceFile  = inSourceFile;
+        sink        = inSink;
+        namePool    = inNamePool;
 
         auto content = inSourceFile->content;
 
@@ -222,6 +224,8 @@ namespace Slang
                     lexer->cursor++;
                     handleNewLineInner(lexer, d);
 
+                    lexer->tokenFlags |= TokenFlag::ScrubbingNeeded;
+
                     // Now try again, looking at the character after the
                     // escaped nmewline.
                     continue;
@@ -1215,11 +1219,11 @@ namespace Slang
 
     Token Lexer::lexToken()
     {
-        auto flags = this->tokenFlags;
+        auto& flags = this->tokenFlags;
         for(;;)
         {
             Token token;
-            token.Position = getSourceLoc(this);
+            token.loc = getSourceLoc(this);
 
             char const* textBegin = cursor;
 
@@ -1246,7 +1250,7 @@ namespace Slang
             // We don't want to skip the end-of-file token, but we *do*
             // want to make sure it has appropriate flags to make our life easier
             case TokenType::EndOfFile:
-                flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
+                flags |= TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace;
                 break;
 
             // We will also do some book-keeping around preprocessor directives here:
@@ -1316,6 +1320,11 @@ namespace Slang
 
             this->tokenFlags = 0;
 
+            if (tokenType == TokenType::Identifier)
+            {
+                token.ptrValue = this->namePool->getName(token.Content);
+            }
+
             return token;
         }
     }
@@ -1332,42 +1341,4 @@ namespace Slang
                 return tokenList;
         }
     }
-
-
-
-#if 0
-    TokenList Lexer::Parse(const String & fileName, const String & str, DiagnosticSink * sink)
-    {
-        TokenList tokenList;
-        tokenList.mTokens = TokenizeText(fileName, str, [&](TokenizeErrorType errType, SourceLoc pos)
-        {
-            auto curChar = str[pos.Pos];
-            switch (errType)
-            {
-            case TokenizeErrorType::InvalidCharacter:
-                // Check if inside the ASCII "printable" range
-                if(curChar >= 0x20 && curChar <=  0x7E)
-                {
-                    char buffer[] = { curChar, 0 };
-                    sink->diagnose(pos, Diagnostics::illegalCharacterPrint, buffer);
-                }
-                else
-                {
-                    // Fallback: print as hexadecimal
-                    sink->diagnose(pos, Diagnostics::illegalCharacterHex, String((unsigned char)curChar, 16));
-                }
-                break;
-            case TokenizeErrorType::InvalidEscapeSequence:
-                sink->diagnose(pos, Diagnostics::illegalCharacterLiteral);
-                break;
-            default:
-                break;
-            }
-        });
-
-        // Add an end-of-file token so that we can reference it in diagnostic messages
-        tokenList.mTokens.Add(Token(TokenType::EndOfFile, "", 0, 0, 0, fileName, TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace));
-        return tokenList;
-    }
-#endif
 }
 \ No newline at end of file
author	Tim Foley <tfoley@nvidia.com>	2017-08-14 14:48:37 -0700
committer	Tim Foley <tfoley@nvidia.com>	2017-08-14 14:48:37 -0700
commit	9885c972a6bfa6f856e505cdd90d9b71fdbdadaf (patch)
tree	7314b26e21ded966b6a4fe2430f0421c0c0970bd /source/slang/lexer.cpp
parent	7f57ea4ad86c2a3eb5a14fef458e711845c1f87e (diff)