Feature/lex memory reduction (#762)

* Only do scrubbing if needed. When allocating content try to limit size (with scrubbing each token takes up 1k), now it's 16 bytes min size. * Don't allocate for every call to write on the CallbackWriter - use the m_appendBuffer. * Don't allocate memory for CallbackWriter use m_appendBuffer. * Use UnownedStringSlice for suffix output for parsing float/int literals. Fix typo in invalidFloatingPointLiteralSuffix * Using memory arena to hold tokens that are not in SourceManager. * Improve comment on lexing. * Make UnownedStringSlice allocation simpler on SourceManager. * Fix error on gcc around UnownedStringSlice - because VC converted string + UnownedStringSlice automatically into a String. * Fix generateName needing concat string for gcc. * When constructing a Token in parseAttributeName - because it's a Identifier, we have to set the Name. * Remove translation through String on getIntrinsicOp * Make func-cbuffer-param disablable with -exclude compatibility-issue * Move memory leak in render-test. * From review - can just use "?:" instead of performing a concat.
author: jsmall-nvidia <jsmall@nvidia.com> 2018-12-20 13:23:58 -0500
committer: GitHub <noreply@github.com> 2018-12-20 13:23:58 -0500
commit: 02e44bade6370309c0292e84178095c2bae299be (patch)
tree: 9eca881afbd33c665cdb3616cb3f50994efee436 /source/slang/lexer.cpp
parent: 332056a947ec3d9e3588a60d449d64577a6f18c0 (diff)
1 files changed, 36 insertions, 25 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 918668296..3d87dd7ea 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -14,7 +14,7 @@ namespace Slang
 {
     static Token GetEndOfFileToken()
     {
-        return Token(TokenType::EndOfFile, "", SourceLoc());
+        return Token(TokenType::EndOfFile, UnownedStringSlice::fromLiteral(""), SourceLoc());
     }
 
     Token* TokenList::begin() const
@@ -86,11 +86,13 @@ namespace Slang
     void Lexer::initialize(
         SourceView*     inSourceView,
         DiagnosticSink* inSink,
-        NamePool*       inNamePool)
+        NamePool*       inNamePool,
+        MemoryArena*    inMemoryArena)
     {
         sourceView  = inSourceView;
         sink        = inSink;
         namePool    = inNamePool;
+        memoryArena = inMemoryArena;
 
         auto content = inSourceView->getContent();
         
@@ -548,7 +550,7 @@ namespace Slang
 
 
 
-    IntegerLiteralValue getIntegerLiteralValue(Token const& token, String* outSuffix)
+    IntegerLiteralValue getIntegerLiteralValue(Token const& token, UnownedStringSlice* outSuffix)
     {
         IntegerLiteralValue value = 0;
 
@@ -568,13 +570,13 @@ namespace Slang
 
         if(outSuffix)
         {
-            *outSuffix = String(cursor, end);
+            *outSuffix = UnownedStringSlice(cursor, end);
         }
 
         return value;
     }
 
-    FloatingPointLiteralValue getFloatingPointLiteralValue(Token const& token, String* outSuffix)
+    FloatingPointLiteralValue getFloatingPointLiteralValue(Token const& token, UnownedStringSlice* outSuffix)
     {
         FloatingPointLiteralValue value = 0;
 
@@ -661,7 +663,7 @@ namespace Slang
 
         if(outSuffix)
         {
-            *outSuffix = String(cursor, end);
+            *outSuffix = UnownedStringSlice(cursor, end);
         }
 
         return value;
@@ -784,7 +786,7 @@ namespace Slang
                 return valueBuilder.ProduceString();
             }
 
-            // Charcters that don't being escape sequences are easy;
+            // Characters that don't being escape sequences are easy;
             // just append them to the buffer and move on.
             if(c != '\\')
             {
@@ -888,11 +890,11 @@ namespace Slang
     {
         // A file name usually doesn't process escape sequences
         // (this is import on Windows, where `\\` is a valid
-        // path separator cahracter).
+        // path separator character).
 
         // Just trim off the first and last characters to remove the quotes
         // (whether they were `""` or `<>`.
-        return token.Content.SubString(1, token.Content.Length()-2);
+        return String(token.Content.begin() + 1, token.Content.end() - 1); 
     }
 
 
@@ -1268,40 +1270,49 @@ namespace Slang
             // Note(tfoley): `StringBuilder::Append()` seems to crash when appending zero bytes
             if(textEnd != textBegin)
             {
-                // HACK(tfoley): "scrubbing" token value here to remove escaped newlines...
+                // "scrubbing" token value here to remove escaped newlines...
                 //
-                // TODO: Only perform this work if we encountered an escaped newline
+                // Only perform this work if we encountered an escaped newline
                 // while lexing this token (e.g., keep a flag on the lexer), or
                 // do it on-demand when the actual value of the token is needed.
-
-                StringBuilder valueBuilder;
-                auto tt = textBegin;
-                while(tt != textEnd)
+                if (tokenFlags & TokenFlag::ScrubbingNeeded)
                 {
-                    char c = *tt++;
-                    if(c == '\\')
+                    // Allocate space that will always be more than enough for stripped contents
+                    char* startDst = (char*)memoryArena->allocateUnaligned(textEnd - textBegin);
+                    char* dst = startDst;
+
+                    auto tt = textBegin;
+                    while (tt != textEnd)
                     {
-                        char d = *tt;
-                        switch(d)
+                        char c = *tt++;
+                        if (c == '\\')
                         {
-                        case '\r': case '\n':
+                            char d = *tt;
+                            switch (d)
+                            {
+                            case '\r': case '\n':
                             {
                                 tt++;
                                 char e = *tt;
-                                if((d ^ e) == ('\r' ^ '\n'))
+                                if ((d ^ e) == ('\r' ^ '\n'))
                                 {
                                     tt++;
                                 }
                             }
                             continue;
 
-                        default:
-                            break;
+                            default:
+                                break;
+                            }
                         }
+                        *dst++ = c;
                     }
-                    valueBuilder.Append(c);
+                    token.Content = UnownedStringSlice(startDst, dst);
+                }
+                else
+                {
+                    token.Content = UnownedStringSlice(textBegin, textEnd);
                 }
-                token.Content = valueBuilder.ProduceString();
             }
 
             token.flags = flags;
author	jsmall-nvidia <jsmall@nvidia.com>	2018-12-20 13:23:58 -0500
committer	GitHub <noreply@github.com>	2018-12-20 13:23:58 -0500
commit	02e44bade6370309c0292e84178095c2bae299be (patch)
tree	9eca881afbd33c665cdb3616cb3f50994efee436 /source/slang/lexer.cpp
parent	332056a947ec3d9e3588a60d449d64577a6f18c0 (diff)