Preprocessor cleanups (#484)

* For a `#error` or `#warning`, read the rest of the line as raw text to include in the error message * When skipping tokens (e.g., in an `#ifdef`d out block), don't emit errors on invalid characters * TODO: we could clearly get more efficient and skip whole raw lines in the future * Fix an issue when a macro invocation that expands to nothing (zero tokens) is the last thing before a directive. The preprocessor was returning the `#` as an ordinary token, because it has already gone past its test for directives.
author: Tim Foley <tfoleyNV@users.noreply.github.com> 2018-04-12 17:08:52 -0700
committer: GitHub <noreply@github.com> 2018-04-12 17:08:52 -0700
commit: 021a4923f429278b1d7434e01cbf83edcdf43da4 (patch)
tree: 7c3c5534de8a98c28554d36c41e3e3c1e1b48aee /source
parent: baf194e7456ba4568dcf11249896af35b3ce18cc (diff)
4 files changed, 107 insertions, 46 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp
index 3f8709a9d..87ad6bd5c 100644
--- a/source/slang/lexer.cpp
+++ b/source/slang/lexer.cpp
@@ -273,7 +273,7 @@ namespace Slang
             case kEOF:
                 // TODO(tfoley) diagnostic!
                 return;
-                    
+
             case '\n': case '\r':
                 handleNewLine(lexer);
                 continue;
@@ -490,7 +490,7 @@ namespace Slang
     {
         lexDigits(lexer, base);
         maybeLexNumberExponent(lexer, base);
-            
+
         return maybeLexNumberSuffix(lexer, TokenType::FloatingPointLiteral);
     }
 
@@ -940,20 +940,38 @@ namespace Slang
 
 
 
-    static TokenType lexTokenImpl(Lexer* lexer)
+    static TokenType lexTokenImpl(Lexer* lexer, LexerFlags effectiveFlags)
     {
+        if(effectiveFlags & kLexerFlag_ExpectDirectiveMessage)
+        {
+            for(;;)
+            {
+                switch(peek(lexer))
+                {
+                default:
+                    advance(lexer);
+                    continue;
+
+                case kEOF: case '\r': case '\n':
+                    break;
+                }
+                break;
+            }
+            return TokenType::DirectiveMessage;
+        }
+
         switch(peek(lexer))
         {
         default:
             break;
 
         case kEOF:
-            if((lexer->lexerFlags & kLexerFlag_InDirective) != 0)
+            if((effectiveFlags & kLexerFlag_InDirective) != 0)
                 return TokenType::EndOfDirective;
             return TokenType::EndOfFile;
 
         case '\r': case '\n':
-            if((lexer->lexerFlags & kLexerFlag_InDirective) != 0)
+            if((effectiveFlags & kLexerFlag_InDirective) != 0)
                 return TokenType::EndOfDirective;
             handleNewLine(lexer);
             return TokenType::NewLine;
@@ -1201,24 +1219,27 @@ namespace Slang
             // unexpected/invalid character.
 
             auto loc = getSourceLoc(lexer);
-            auto sink = lexer->sink;
             int c = advance(lexer);
-            if(c >= 0x20 && c <=  0x7E)
-            {
-                char buffer[] = { (char) c, 0 };
-                sink->diagnose(loc, Diagnostics::illegalCharacterPrint, buffer);
-            }
-            else
+            if(!(effectiveFlags & kLexerFlag_IgnoreInvalid))
             {
-                // Fallback: print as hexadecimal
-                sink->diagnose(loc, Diagnostics::illegalCharacterHex, String((unsigned char)c, 16));
+                auto sink = lexer->sink;
+                if(c >= 0x20 && c <=  0x7E)
+                {
+                    char buffer[] = { (char) c, 0 };
+                    sink->diagnose(loc, Diagnostics::illegalCharacterPrint, buffer);
+                }
+                else
+                {
+                    // Fallback: print as hexadecimal
+                    sink->diagnose(loc, Diagnostics::illegalCharacterHex, String((unsigned char)c, 16));
+                }
             }
 
             return TokenType::Invalid;
         }
     }
 
-    Token Lexer::lexToken()
+    Token Lexer::lexToken(LexerFlags extraFlags)
     {
         auto& flags = this->tokenFlags;
         for(;;)
@@ -1228,7 +1249,7 @@ namespace Slang
 
             char const* textBegin = cursor;
 
-            auto tokenType = lexTokenImpl(this);
+            auto tokenType = lexTokenImpl(this, this->lexerFlags | extraFlags);
 
             // The low-level lexer produces tokens for things we want
             // to ignore, such as white space, so we skip them here.
@@ -1342,4 +1363,4 @@ namespace Slang
                 return tokenList;
         }
     }
-}
-\ No newline at end of file
+}
diff --git a/source/slang/lexer.h b/source/slang/lexer.h
index 23eddf04e..1f2954d27 100644
--- a/source/slang/lexer.h
+++ b/source/slang/lexer.h
@@ -64,8 +64,10 @@ namespace Slang
     typedef unsigned int LexerFlags;
     enum
     {
-        kLexerFlag_InDirective      = 1 << 0,
-        kLexerFlag_ExpectFileName   = 1 << 1,
+        kLexerFlag_InDirective      = 1 << 0, ///< Turn end-of-line and end-of-file into end-of-directive
+        kLexerFlag_ExpectFileName   = 1 << 1, ///< Support `<>` style strings for file paths
+        kLexerFlag_IgnoreInvalid    = 1 << 2, ///< Suppress errors about invalid/unsupported characters
+        kLexerFlag_ExpectDirectiveMessage = 1 << 3, ///< Don't lexer ordinary tokens, and instead consume rest of line as a string
     };
 
     struct Lexer
@@ -77,7 +79,7 @@ namespace Slang
 
         ~Lexer();
 
-        Token lexToken();
+        Token lexToken(LexerFlags extraFlags = 0);
 
         TokenList lexAllTokens();
 
@@ -121,4 +123,4 @@ namespace Slang
     FloatingPointLiteralValue getFloatingPointLiteralValue(Token const& token, String* outSuffix = 0);
 }
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/source/slang/preprocessor.cpp b/source/slang/preprocessor.cpp
index 46dbd8fe9..301264632 100644
--- a/source/slang/preprocessor.cpp
+++ b/source/slang/preprocessor.cpp
@@ -225,6 +225,7 @@ static DiagnosticSink* GetSink(Preprocessor* preprocessor)
 
 static void DestroyConditional(PreprocessorConditional* conditional);
 static void DestroyMacro(Preprocessor* preprocessor, PreprocessorMacro* macro);
+static bool IsSkipping(Preprocessor* preprocessor);
 
 //
 // Basic Input Handling
@@ -314,12 +315,12 @@ static void EndInputStream(Preprocessor* preprocessor, PreprocessorInputStream*
 }
 
 // Consume one token from an input stream
-static Token AdvanceRawToken(PreprocessorInputStream* inputStream)
+static Token AdvanceRawToken(PreprocessorInputStream* inputStream, LexerFlags lexerFlags = 0)
 {
     if( auto primaryStream = asPrimaryInputStream(inputStream) )
     {
         auto result = primaryStream->token;
-        primaryStream->token = primaryStream->lexer.lexToken();
+        primaryStream->token = primaryStream->lexer.lexToken(lexerFlags);
         return result;
     }
     else
@@ -359,24 +360,24 @@ static TokenType PeekRawTokenType(PreprocessorInputStream* inputStream)
 
 
 // Read one token in "raw" mode (meaning don't expand macros)
-static Token AdvanceRawToken(Preprocessor* preprocessor)
+static Token AdvanceRawToken(Preprocessor* preprocessor, LexerFlags lexerFlags = 0)
 {
-    for (;;)
+    for(;;)
     {
         // Look at the input stream on top of the stack
         PreprocessorInputStream* inputStream = preprocessor->inputStream;
 
         // If there isn't one, then there is no more input left to read.
-        if (!inputStream)
+        if(!inputStream)
         {
             return preprocessor->endOfFileToken;
         }
 
         // The top-most input stream may be at its end
-        if (PeekRawTokenType(inputStream) == TokenType::EndOfFile)
+        if(PeekRawTokenType(inputStream) == TokenType::EndOfFile)
         {
             // If there is another stream remaining, switch to it
-            if (inputStream->parent)
+            if(inputStream->parent)
             {
                 preprocessor->inputStream = inputStream->parent;
                 EndInputStream(preprocessor, inputStream);
@@ -385,7 +386,9 @@ static Token AdvanceRawToken(Preprocessor* preprocessor)
         }
 
         // Everything worked, so read a token from the top-most stream
-        return AdvanceRawToken(inputStream);
+        return AdvanceRawToken(
+            inputStream,
+            lexerFlags | (IsSkipping(preprocessor) ? kLexerFlag_IgnoreInvalid : 0));
     }
 }
 
@@ -586,7 +589,7 @@ static SimpleTokenInputStream* createSimpleInputStream(
     eofToken.loc = token.loc;
     eofToken.flags = TokenFlag::AfterWhitespace | TokenFlag::AtStartOfLine;
     inputStream->lexedTokens.mTokens.Add(eofToken);
- 
+
     inputStream->tokenReader = TokenReader(inputStream->lexedTokens);
 
     return inputStream;
@@ -953,11 +956,11 @@ static Token PeekRawToken(PreprocessorDirectiveContext* context)
 }
 
 // Read one raw token in a directive, without going past the end of the line.
-static Token AdvanceRawToken(PreprocessorDirectiveContext* context)
+static Token AdvanceRawToken(PreprocessorDirectiveContext* context, LexerFlags lexerFlags = 0)
 {
     if (IsEndOfLine(context))
         return PeekRawToken(context);
-    return AdvanceRawToken(context->preprocessor);
+    return AdvanceRawToken(context->preprocessor, lexerFlags);
 }
 
 // Peek next raw token type, without going past the end of the line.
@@ -1565,6 +1568,9 @@ static void expectEndOfDirective(PreprocessorDirectiveContext* context)
 // Handle a `#include` directive
 static void HandleIncludeDirective(PreprocessorDirectiveContext* context)
 {
+    // Consume the directive, and inform the lexer to process the remainder of the line as a file path.
+    AdvanceRawToken(context, kLexerFlag_ExpectFileName);
+
     Token pathToken;
     if(!Expect(context, TokenType::StringLiteral, Diagnostics::expectedTokenInPreprocessorDirective, &pathToken))
         return;
@@ -1721,17 +1727,29 @@ static void HandleUndefDirective(PreprocessorDirectiveContext* context)
 // Handle a `#warning` directive
 static void HandleWarningDirective(PreprocessorDirectiveContext* context)
 {
-    // TODO: read rest of line without actual tokenization
-    GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedWarning, "user-defined warning");
-    SkipToEndOfLine(context);
+    // Consume the directive, and inform the lexer to process the remainder of the line as a custom message.
+    AdvanceRawToken(context, kLexerFlag_ExpectDirectiveMessage);
+
+    // Read the message token.
+    Token messageToken;
+    Expect(context, TokenType::DirectiveMessage, Diagnostics::expectedTokenInPreprocessorDirective, &messageToken);
+
+    // Report the custom error.
+    GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedWarning, messageToken.Content);
 }
 
 // Handle a `#error` directive
 static void HandleErrorDirective(PreprocessorDirectiveContext* context)
 {
-    // TODO: read rest of line without actual tokenization
-    GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedError, "user-defined warning");
-    SkipToEndOfLine(context);
+    // Consume the directive, and inform the lexer to process the remainder of the line as a custom message.
+    AdvanceRawToken(context, kLexerFlag_ExpectDirectiveMessage);
+
+    // Read the message token.
+    Token messageToken;
+    Expect(context, TokenType::DirectiveMessage, Diagnostics::expectedTokenInPreprocessorDirective, &messageToken);
+
+    // Report the custom error.
+    GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedError, messageToken.Content);
 }
 
 // Handle a `#line` directive
@@ -1898,6 +1916,11 @@ enum PreprocessorDirectiveFlag : unsigned int
 {
     // Should this directive be handled even when skipping disbaled code?
     ProcessWhenSkipping = 1 << 0,
+
+    /// Allow the handler for this directive to advance past the
+    /// directive token itself, so that it can control lexer behavior
+    /// more closely.
+    DontConsumeDirectiveAutomatically = 1 << 1,
 };
 
 // Information about a specific directive
@@ -1925,11 +1948,11 @@ static const PreprocessorDirective kDirectives[] =
     { "elif",       &HandleElifDirective,       ProcessWhenSkipping },
     { "endif",      &HandleEndIfDirective,      ProcessWhenSkipping },
 
-    { "include",    &HandleIncludeDirective,    0 },
+    { "include",    &HandleIncludeDirective,    DontConsumeDirectiveAutomatically },
     { "define",     &HandleDefineDirective,     0 },
     { "undef",      &HandleUndefDirective,      0 },
-    { "warning",    &HandleWarningDirective,    0 },
-    { "error",      &HandleErrorDirective,      0 },
+    { "warning",    &HandleWarningDirective,    DontConsumeDirectiveAutomatically },
+    { "error",      &HandleErrorDirective,      DontConsumeDirectiveAutomatically },
     { "line",       &HandleLineDirective,       0 },
     { "pragma",     &HandlePragmaDirective,     0 },
 
@@ -1982,9 +2005,6 @@ static void HandleDirective(PreprocessorDirectiveContext* context)
         return;
     }
 
-    // Consume the directive name token.
-    AdvanceRawToken(context);
-
     // Look up the handler for the directive.
     PreprocessorDirective const* directive = FindDirective(GetDirectiveName(context));
 
@@ -1996,6 +2016,12 @@ static void HandleDirective(PreprocessorDirectiveContext* context)
         return;
     }
 
+    if(!(directive->flags & PreprocessorDirectiveFlag::DontConsumeDirectiveAutomatically))
+    {
+        // Consume the directive name token.
+        AdvanceRawToken(context);
+    }
+
     // Apply the directive-specific callback
     (directive->callback)(context);
 
@@ -2009,12 +2035,23 @@ static Token ReadToken(Preprocessor* preprocessor)
 {
     for (;;)
     {
+        // Depending on what the lookahead token is, we
+        // might need to start expanding it.
+        //
+        // Note: doing this at the start of this loop
+        // is important, in case a macro has an empty
+        // expansion, and we end up looking at a different
+        // token after applying the expansion.
+        if(!IsSkipping(preprocessor))
+        {
+            MaybeBeginMacroExpansion(preprocessor);
+        }
+
         // Look at the next raw token in the input.
         Token const& token = PeekRawToken(preprocessor);
         if (token.type == TokenType::EndOfFile)
             return token;
 
-
         // If we have a directive (`#` at start of line) then handle it
         if ((token.type == TokenType::Pound) && (token.flags & TokenFlag::AtStartOfLine))
         {
@@ -2174,7 +2211,7 @@ TokenList preprocessSource(
         {
             sb << " ";
         }
-        
+
         sb << t.Content;
     }
 
diff --git a/source/slang/token-defs.h b/source/slang/token-defs.h
index 9edc1450d..a08084d85 100644
--- a/source/slang/token-defs.h
+++ b/source/slang/token-defs.h
@@ -29,6 +29,7 @@ TOKEN(WhiteSpace,       "whitespace")
 TOKEN(NewLine,          "newline")
 TOKEN(LineComment,      "line comment")
 TOKEN(BlockComment,     "block comment")
+TOKEN(DirectiveMessage, "user-defined message")
 
 #define PUNCTUATION(id, text) \
     TOKEN(id, "'" text "'")
author	Tim Foley <tfoleyNV@users.noreply.github.com>	2018-04-12 17:08:52 -0700
committer	GitHub <noreply@github.com>	2018-04-12 17:08:52 -0700
commit	021a4923f429278b1d7434e01cbf83edcdf43da4 (patch)
tree	7c3c5534de8a98c28554d36c41e3e3c1e1b48aee /source
parent	baf194e7456ba4568dcf11249896af35b3ce18cc (diff)