diff options
| author | Tim Foley <tfoleyNV@users.noreply.github.com> | 2018-04-12 17:08:52 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-04-12 17:08:52 -0700 |
| commit | 021a4923f429278b1d7434e01cbf83edcdf43da4 (patch) | |
| tree | 7c3c5534de8a98c28554d36c41e3e3c1e1b48aee /source | |
| parent | baf194e7456ba4568dcf11249896af35b3ce18cc (diff) | |
Preprocessor cleanups (#484)
* For a `#error` or `#warning`, read the rest of the line as raw text to include in the error message
* When skipping tokens (e.g., in an `#ifdef`d out block), don't emit errors on invalid characters
* TODO: we could clearly get more efficient and skip whole raw lines in the future
* Fix an issue when a macro invocation that expands to nothing (zero tokens) is the last thing before a directive. The preprocessor was returning the `#` as an ordinary token, because it has already gone past its test for directives.
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/lexer.cpp | 55 | ||||
| -rw-r--r-- | source/slang/lexer.h | 10 | ||||
| -rw-r--r-- | source/slang/preprocessor.cpp | 87 | ||||
| -rw-r--r-- | source/slang/token-defs.h | 1 |
4 files changed, 107 insertions, 46 deletions
diff --git a/source/slang/lexer.cpp b/source/slang/lexer.cpp index 3f8709a9d..87ad6bd5c 100644 --- a/source/slang/lexer.cpp +++ b/source/slang/lexer.cpp @@ -273,7 +273,7 @@ namespace Slang case kEOF: // TODO(tfoley) diagnostic! return; - + case '\n': case '\r': handleNewLine(lexer); continue; @@ -490,7 +490,7 @@ namespace Slang { lexDigits(lexer, base); maybeLexNumberExponent(lexer, base); - + return maybeLexNumberSuffix(lexer, TokenType::FloatingPointLiteral); } @@ -940,20 +940,38 @@ namespace Slang - static TokenType lexTokenImpl(Lexer* lexer) + static TokenType lexTokenImpl(Lexer* lexer, LexerFlags effectiveFlags) { + if(effectiveFlags & kLexerFlag_ExpectDirectiveMessage) + { + for(;;) + { + switch(peek(lexer)) + { + default: + advance(lexer); + continue; + + case kEOF: case '\r': case '\n': + break; + } + break; + } + return TokenType::DirectiveMessage; + } + switch(peek(lexer)) { default: break; case kEOF: - if((lexer->lexerFlags & kLexerFlag_InDirective) != 0) + if((effectiveFlags & kLexerFlag_InDirective) != 0) return TokenType::EndOfDirective; return TokenType::EndOfFile; case '\r': case '\n': - if((lexer->lexerFlags & kLexerFlag_InDirective) != 0) + if((effectiveFlags & kLexerFlag_InDirective) != 0) return TokenType::EndOfDirective; handleNewLine(lexer); return TokenType::NewLine; @@ -1201,24 +1219,27 @@ namespace Slang // unexpected/invalid character. auto loc = getSourceLoc(lexer); - auto sink = lexer->sink; int c = advance(lexer); - if(c >= 0x20 && c <= 0x7E) - { - char buffer[] = { (char) c, 0 }; - sink->diagnose(loc, Diagnostics::illegalCharacterPrint, buffer); - } - else + if(!(effectiveFlags & kLexerFlag_IgnoreInvalid)) { - // Fallback: print as hexadecimal - sink->diagnose(loc, Diagnostics::illegalCharacterHex, String((unsigned char)c, 16)); + auto sink = lexer->sink; + if(c >= 0x20 && c <= 0x7E) + { + char buffer[] = { (char) c, 0 }; + sink->diagnose(loc, Diagnostics::illegalCharacterPrint, buffer); + } + else + { + // Fallback: print as hexadecimal + sink->diagnose(loc, Diagnostics::illegalCharacterHex, String((unsigned char)c, 16)); + } } return TokenType::Invalid; } } - Token Lexer::lexToken() + Token Lexer::lexToken(LexerFlags extraFlags) { auto& flags = this->tokenFlags; for(;;) @@ -1228,7 +1249,7 @@ namespace Slang char const* textBegin = cursor; - auto tokenType = lexTokenImpl(this); + auto tokenType = lexTokenImpl(this, this->lexerFlags | extraFlags); // The low-level lexer produces tokens for things we want // to ignore, such as white space, so we skip them here. @@ -1342,4 +1363,4 @@ namespace Slang return tokenList; } } -}
\ No newline at end of file +} diff --git a/source/slang/lexer.h b/source/slang/lexer.h index 23eddf04e..1f2954d27 100644 --- a/source/slang/lexer.h +++ b/source/slang/lexer.h @@ -64,8 +64,10 @@ namespace Slang typedef unsigned int LexerFlags; enum { - kLexerFlag_InDirective = 1 << 0, - kLexerFlag_ExpectFileName = 1 << 1, + kLexerFlag_InDirective = 1 << 0, ///< Turn end-of-line and end-of-file into end-of-directive + kLexerFlag_ExpectFileName = 1 << 1, ///< Support `<>` style strings for file paths + kLexerFlag_IgnoreInvalid = 1 << 2, ///< Suppress errors about invalid/unsupported characters + kLexerFlag_ExpectDirectiveMessage = 1 << 3, ///< Don't lexer ordinary tokens, and instead consume rest of line as a string }; struct Lexer @@ -77,7 +79,7 @@ namespace Slang ~Lexer(); - Token lexToken(); + Token lexToken(LexerFlags extraFlags = 0); TokenList lexAllTokens(); @@ -121,4 +123,4 @@ namespace Slang FloatingPointLiteralValue getFloatingPointLiteralValue(Token const& token, String* outSuffix = 0); } -#endif
\ No newline at end of file +#endif diff --git a/source/slang/preprocessor.cpp b/source/slang/preprocessor.cpp index 46dbd8fe9..301264632 100644 --- a/source/slang/preprocessor.cpp +++ b/source/slang/preprocessor.cpp @@ -225,6 +225,7 @@ static DiagnosticSink* GetSink(Preprocessor* preprocessor) static void DestroyConditional(PreprocessorConditional* conditional); static void DestroyMacro(Preprocessor* preprocessor, PreprocessorMacro* macro); +static bool IsSkipping(Preprocessor* preprocessor); // // Basic Input Handling @@ -314,12 +315,12 @@ static void EndInputStream(Preprocessor* preprocessor, PreprocessorInputStream* } // Consume one token from an input stream -static Token AdvanceRawToken(PreprocessorInputStream* inputStream) +static Token AdvanceRawToken(PreprocessorInputStream* inputStream, LexerFlags lexerFlags = 0) { if( auto primaryStream = asPrimaryInputStream(inputStream) ) { auto result = primaryStream->token; - primaryStream->token = primaryStream->lexer.lexToken(); + primaryStream->token = primaryStream->lexer.lexToken(lexerFlags); return result; } else @@ -359,24 +360,24 @@ static TokenType PeekRawTokenType(PreprocessorInputStream* inputStream) // Read one token in "raw" mode (meaning don't expand macros) -static Token AdvanceRawToken(Preprocessor* preprocessor) +static Token AdvanceRawToken(Preprocessor* preprocessor, LexerFlags lexerFlags = 0) { - for (;;) + for(;;) { // Look at the input stream on top of the stack PreprocessorInputStream* inputStream = preprocessor->inputStream; // If there isn't one, then there is no more input left to read. - if (!inputStream) + if(!inputStream) { return preprocessor->endOfFileToken; } // The top-most input stream may be at its end - if (PeekRawTokenType(inputStream) == TokenType::EndOfFile) + if(PeekRawTokenType(inputStream) == TokenType::EndOfFile) { // If there is another stream remaining, switch to it - if (inputStream->parent) + if(inputStream->parent) { preprocessor->inputStream = inputStream->parent; EndInputStream(preprocessor, inputStream); @@ -385,7 +386,9 @@ static Token AdvanceRawToken(Preprocessor* preprocessor) } // Everything worked, so read a token from the top-most stream - return AdvanceRawToken(inputStream); + return AdvanceRawToken( + inputStream, + lexerFlags | (IsSkipping(preprocessor) ? kLexerFlag_IgnoreInvalid : 0)); } } @@ -586,7 +589,7 @@ static SimpleTokenInputStream* createSimpleInputStream( eofToken.loc = token.loc; eofToken.flags = TokenFlag::AfterWhitespace | TokenFlag::AtStartOfLine; inputStream->lexedTokens.mTokens.Add(eofToken); - + inputStream->tokenReader = TokenReader(inputStream->lexedTokens); return inputStream; @@ -953,11 +956,11 @@ static Token PeekRawToken(PreprocessorDirectiveContext* context) } // Read one raw token in a directive, without going past the end of the line. -static Token AdvanceRawToken(PreprocessorDirectiveContext* context) +static Token AdvanceRawToken(PreprocessorDirectiveContext* context, LexerFlags lexerFlags = 0) { if (IsEndOfLine(context)) return PeekRawToken(context); - return AdvanceRawToken(context->preprocessor); + return AdvanceRawToken(context->preprocessor, lexerFlags); } // Peek next raw token type, without going past the end of the line. @@ -1565,6 +1568,9 @@ static void expectEndOfDirective(PreprocessorDirectiveContext* context) // Handle a `#include` directive static void HandleIncludeDirective(PreprocessorDirectiveContext* context) { + // Consume the directive, and inform the lexer to process the remainder of the line as a file path. + AdvanceRawToken(context, kLexerFlag_ExpectFileName); + Token pathToken; if(!Expect(context, TokenType::StringLiteral, Diagnostics::expectedTokenInPreprocessorDirective, &pathToken)) return; @@ -1721,17 +1727,29 @@ static void HandleUndefDirective(PreprocessorDirectiveContext* context) // Handle a `#warning` directive static void HandleWarningDirective(PreprocessorDirectiveContext* context) { - // TODO: read rest of line without actual tokenization - GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedWarning, "user-defined warning"); - SkipToEndOfLine(context); + // Consume the directive, and inform the lexer to process the remainder of the line as a custom message. + AdvanceRawToken(context, kLexerFlag_ExpectDirectiveMessage); + + // Read the message token. + Token messageToken; + Expect(context, TokenType::DirectiveMessage, Diagnostics::expectedTokenInPreprocessorDirective, &messageToken); + + // Report the custom error. + GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedWarning, messageToken.Content); } // Handle a `#error` directive static void HandleErrorDirective(PreprocessorDirectiveContext* context) { - // TODO: read rest of line without actual tokenization - GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedError, "user-defined warning"); - SkipToEndOfLine(context); + // Consume the directive, and inform the lexer to process the remainder of the line as a custom message. + AdvanceRawToken(context, kLexerFlag_ExpectDirectiveMessage); + + // Read the message token. + Token messageToken; + Expect(context, TokenType::DirectiveMessage, Diagnostics::expectedTokenInPreprocessorDirective, &messageToken); + + // Report the custom error. + GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedError, messageToken.Content); } // Handle a `#line` directive @@ -1898,6 +1916,11 @@ enum PreprocessorDirectiveFlag : unsigned int { // Should this directive be handled even when skipping disbaled code? ProcessWhenSkipping = 1 << 0, + + /// Allow the handler for this directive to advance past the + /// directive token itself, so that it can control lexer behavior + /// more closely. + DontConsumeDirectiveAutomatically = 1 << 1, }; // Information about a specific directive @@ -1925,11 +1948,11 @@ static const PreprocessorDirective kDirectives[] = { "elif", &HandleElifDirective, ProcessWhenSkipping }, { "endif", &HandleEndIfDirective, ProcessWhenSkipping }, - { "include", &HandleIncludeDirective, 0 }, + { "include", &HandleIncludeDirective, DontConsumeDirectiveAutomatically }, { "define", &HandleDefineDirective, 0 }, { "undef", &HandleUndefDirective, 0 }, - { "warning", &HandleWarningDirective, 0 }, - { "error", &HandleErrorDirective, 0 }, + { "warning", &HandleWarningDirective, DontConsumeDirectiveAutomatically }, + { "error", &HandleErrorDirective, DontConsumeDirectiveAutomatically }, { "line", &HandleLineDirective, 0 }, { "pragma", &HandlePragmaDirective, 0 }, @@ -1982,9 +2005,6 @@ static void HandleDirective(PreprocessorDirectiveContext* context) return; } - // Consume the directive name token. - AdvanceRawToken(context); - // Look up the handler for the directive. PreprocessorDirective const* directive = FindDirective(GetDirectiveName(context)); @@ -1996,6 +2016,12 @@ static void HandleDirective(PreprocessorDirectiveContext* context) return; } + if(!(directive->flags & PreprocessorDirectiveFlag::DontConsumeDirectiveAutomatically)) + { + // Consume the directive name token. + AdvanceRawToken(context); + } + // Apply the directive-specific callback (directive->callback)(context); @@ -2009,12 +2035,23 @@ static Token ReadToken(Preprocessor* preprocessor) { for (;;) { + // Depending on what the lookahead token is, we + // might need to start expanding it. + // + // Note: doing this at the start of this loop + // is important, in case a macro has an empty + // expansion, and we end up looking at a different + // token after applying the expansion. + if(!IsSkipping(preprocessor)) + { + MaybeBeginMacroExpansion(preprocessor); + } + // Look at the next raw token in the input. Token const& token = PeekRawToken(preprocessor); if (token.type == TokenType::EndOfFile) return token; - // If we have a directive (`#` at start of line) then handle it if ((token.type == TokenType::Pound) && (token.flags & TokenFlag::AtStartOfLine)) { @@ -2174,7 +2211,7 @@ TokenList preprocessSource( { sb << " "; } - + sb << t.Content; } diff --git a/source/slang/token-defs.h b/source/slang/token-defs.h index 9edc1450d..a08084d85 100644 --- a/source/slang/token-defs.h +++ b/source/slang/token-defs.h @@ -29,6 +29,7 @@ TOKEN(WhiteSpace, "whitespace") TOKEN(NewLine, "newline") TOKEN(LineComment, "line comment") TOKEN(BlockComment, "block comment") +TOKEN(DirectiveMessage, "user-defined message") #define PUNCTUATION(id, text) \ TOKEN(id, "'" text "'") |
