diff options
47 files changed, 2482 insertions, 1350 deletions
diff --git a/source/compiler-core/slang-lexer.cpp b/source/compiler-core/slang-lexer.cpp index ab60edd97..653c43dba 100644 --- a/source/compiler-core/slang-lexer.cpp +++ b/source/compiler-core/slang-lexer.cpp @@ -38,8 +38,9 @@ namespace Slang TokenReader::TokenReader() : m_cursor(nullptr) , m_end (nullptr) - {} - + { + _updateLookaheadToken(); + } Token& TokenReader::peekToken() { @@ -58,18 +59,33 @@ namespace Slang Token TokenReader::advanceToken() { - if (!m_cursor) - return getEndOfFileToken(); - - Token token = m_nextToken; - if (m_cursor < m_end) - { + Token result = m_nextToken; + if (m_cursor != m_end) m_cursor++; - m_nextToken = *m_cursor; - } - else + _updateLookaheadToken(); + return result; + } + + void TokenReader::_updateLookaheadToken() + { + // We assume here that we can read a token from a non-null `m_cursor` + // *even* in the case where `m_cursor == m_end`, because the invariant + // for lists of tokens is that they should be terminated with and + // end-of-file token, so that there is always a token "one past the end." + // + m_nextToken = m_cursor ? *m_cursor : getEndOfFileToken(); + + // If the token we read came from the end of the sub-sequence we are + // reading, then we will change the token type to an end-of-file token + // so that code that reads from the sequence and expects a terminating + // EOF will find it. + // + // TODO: We might eventually want a way to look at the actual token type + // and not just use EOF in all cases: e.g., when emitting diagnostic + // messages that include the token that is seen. + // + if(m_cursor == m_end) m_nextToken.type = TokenType::EndOfFile; - return token; } // Lexer @@ -78,8 +94,7 @@ namespace Slang SourceView* sourceView, DiagnosticSink* sink, NamePool* namePool, - MemoryArena* memoryArena, - OptionFlags optionFlags) + MemoryArena* memoryArena) { m_sourceView = sourceView; m_sink = sink; @@ -95,9 +110,12 @@ namespace Slang // Set the start location m_startLoc = sourceView->getRange().begin; + // The first token read from a translation unit should be considered to be at + // the start of a line, and *also* as coming after whitespace (conceptually + // both the end-of-file and beginning-of-file pseudo-tokens are whitespace). + // m_tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; m_lexerFlags = 0; - m_optionFlags = optionFlags; } Lexer::~Lexer() @@ -331,7 +349,7 @@ namespace Slang return lexer->m_startLoc + (lexer->m_cursor - lexer->m_begin); } - static void _lexDigits(Lexer* lexer, int base, LexerFlags flags) + static void _lexDigits(Lexer* lexer, int base) { for(;;) { @@ -362,7 +380,7 @@ namespace Slang if(digitVal >= base) { - if (auto sink = lexer->getDiagnosticSink(flags)) + if (auto sink = lexer->getDiagnosticSink()) { char buffer[] = { (char) c, 0 }; sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::invalidDigitForBase, buffer, base); @@ -418,7 +436,7 @@ namespace Slang return true; } - static bool _maybeLexNumberExponent(Lexer* lexer, int base, LexerFlags flags) + static bool _maybeLexNumberExponent(Lexer* lexer, int base) { if(!_isNumberExponent(_peek(lexer), base)) return false; @@ -436,37 +454,37 @@ namespace Slang // TODO(tfoley): it would be an error to not see digits here... - _lexDigits(lexer, 10, flags); + _lexDigits(lexer, 10); return true; } - static TokenType _lexNumberAfterDecimalPoint(Lexer* lexer, int base, LexerFlags flags) + static TokenType _lexNumberAfterDecimalPoint(Lexer* lexer, int base) { - _lexDigits(lexer, base, flags); - _maybeLexNumberExponent(lexer, base, flags); + _lexDigits(lexer, base); + _maybeLexNumberExponent(lexer, base); return _maybeLexNumberSuffix(lexer, TokenType::FloatingPointLiteral); } - static TokenType _lexNumber(Lexer* lexer, int base, LexerFlags flags) + static TokenType _lexNumber(Lexer* lexer, int base) { // TODO(tfoley): Need to consider whether to allow any kind of digit separator character. TokenType tokenType = TokenType::IntegerLiteral; // At the start of things, we just concern ourselves with digits - _lexDigits(lexer, base, flags); + _lexDigits(lexer, base); if( _peek(lexer) == '.' ) { tokenType = TokenType::FloatingPointLiteral; _advance(lexer); - _lexDigits(lexer, base, flags); + _lexDigits(lexer, base); } - if( _maybeLexNumberExponent(lexer, base, flags)) + if( _maybeLexNumberExponent(lexer, base)) { tokenType = TokenType::FloatingPointLiteral; } @@ -669,7 +687,7 @@ namespace Slang return value; } - static void _lexStringLiteralBody(Lexer* lexer, char quote, LexerFlags flags) + static void _lexStringLiteralBody(Lexer* lexer, char quote) { for(;;) { @@ -683,14 +701,14 @@ namespace Slang switch(c) { case kEOF: - if (auto sink = lexer->getDiagnosticSink(flags)) + if (auto sink = lexer->getDiagnosticSink()) { sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::endOfFileInLiteral); } return; case '\n': case '\r': - if (auto sink = lexer->getDiagnosticSink(flags)) + if (auto sink = lexer->getDiagnosticSink()) { sink->diagnose(_getSourceLoc(lexer), LexerDiagnostics::newlineInLiteral); } @@ -907,41 +925,17 @@ namespace Slang return String(content.begin() + 1, content.end() - 1); } - - - static TokenType _lexTokenImpl(Lexer* lexer, LexerFlags effectiveFlags) + static TokenType _lexTokenImpl(Lexer* lexer) { - if(effectiveFlags & kLexerFlag_ExpectDirectiveMessage) - { - for(;;) - { - switch(_peek(lexer)) - { - default: - _advance(lexer); - continue; - - case kEOF: case '\r': case '\n': - break; - } - break; - } - return TokenType::DirectiveMessage; - } - switch(_peek(lexer)) { default: break; case kEOF: - if((effectiveFlags & kLexerFlag_InDirective) != 0) - return TokenType::EndOfDirective; return TokenType::EndOfFile; case '\r': case '\n': - if((effectiveFlags & kLexerFlag_InDirective) != 0) - return TokenType::EndOfDirective; _handleNewLine(lexer); return TokenType::NewLine; @@ -955,7 +949,7 @@ namespace Slang { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return _lexNumberAfterDecimalPoint(lexer, 10, effectiveFlags); + return _lexNumberAfterDecimalPoint(lexer, 10); // TODO(tfoley): handle ellipsis (`...`) @@ -965,7 +959,7 @@ namespace Slang case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return _lexNumber(lexer, 10, effectiveFlags); + return _lexNumber(lexer, 10); case '0': { @@ -978,23 +972,23 @@ namespace Slang case '.': _advance(lexer); - return _lexNumberAfterDecimalPoint(lexer, 10, effectiveFlags); + return _lexNumberAfterDecimalPoint(lexer, 10); case 'x': case 'X': _advance(lexer); - return _lexNumber(lexer, 16, effectiveFlags); + return _lexNumber(lexer, 16); case 'b': case 'B': _advance(lexer); - return _lexNumber(lexer, 2, effectiveFlags); + return _lexNumber(lexer, 2); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - if (auto sink = lexer->getDiagnosticSink(effectiveFlags)) + if (auto sink = lexer->getDiagnosticSink()) { sink->diagnose(loc, LexerDiagnostics::octalLiteral); } - return _lexNumber(lexer, 8, effectiveFlags); + return _lexNumber(lexer, 8); } } @@ -1016,12 +1010,12 @@ namespace Slang case '\"': _advance(lexer); - _lexStringLiteralBody(lexer, '\"', effectiveFlags); + _lexStringLiteralBody(lexer, '\"'); return TokenType::StringLiteral; case '\'': _advance(lexer); - _lexStringLiteralBody(lexer, '\'', effectiveFlags); + _lexStringLiteralBody(lexer, '\''); return TokenType::CharLiteral; case '+': @@ -1202,7 +1196,7 @@ namespace Slang auto loc = _getSourceLoc(lexer); int c = _advance(lexer); - if (auto sink = lexer->getDiagnosticSink(effectiveFlags)) + if (auto sink = lexer->getDiagnosticSink()) { if(c >= 0x20 && c <= 0x7E) { @@ -1220,9 +1214,8 @@ namespace Slang } } - Token Lexer::lexToken(LexerFlags extraFlags) + Token Lexer::lexToken() { - auto& flags = m_tokenFlags; for(;;) { Token token; @@ -1230,73 +1223,54 @@ namespace Slang char const* textBegin = m_cursor; - auto tokenType = _lexTokenImpl(this, m_lexerFlags | extraFlags); + auto tokenType = _lexTokenImpl(this); - // The low-level lexer produces tokens for things we want - // to ignore, such as white space, so we skip them here. + // The flags on the token we just lexed will be based + // on the current state of the lexer. + // + auto tokenFlags = m_tokenFlags; + // + // Depending on what kind of token we just lexed, the + // flags that will be used for the *next* token might + // need to be updated. + // switch(tokenType) { - case TokenType::Invalid: - flags = 0; - continue; - case TokenType::NewLine: - flags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; - continue; + { + // If we just reached the end of a line, then the next token + // should count as being at the start of a line, and also after + // whitespace. + // + m_tokenFlags = TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; + break; + } case TokenType::WhiteSpace: - { - flags |= TokenFlag::AfterWhitespace; - continue; - } case TokenType::BlockComment: case TokenType::LineComment: - { - flags |= TokenFlag::AfterWhitespace; - if (m_optionFlags & OptionFlag::TokenizeComments) { - // We don't break here, and use the normal token adding logic - // because we want the behavior to be identical (in terms of flags etc) - // as if TokenizeComments is not enabled - char const* textEnd = m_cursor; - - token.type = tokenType; - token.flags = m_tokenFlags; - token.setContent(UnownedStringSlice(textBegin, textEnd)); - - return token; + // True horizontal whitespace and comments both count as whitespace. + // + // Note that a line comment does not include the terminating newline, + // we do not need to set `AtStartOfLine` here. + // + m_tokenFlags |= TokenFlag::AfterWhitespace; + break; } - - continue; - } - // We don't want to skip the end-of-file token, but we *do* - // want to make sure it has appropriate flags to make our life easier - case TokenType::EndOfFile: - flags |= TokenFlag::AtStartOfLine | TokenFlag::AfterWhitespace; - break; - - // We will also do some book-keeping around preprocessor directives here: - // - // If we see a `#` at the start of a line, then we are entering a - // preprocessor directive. - case TokenType::Pound: - if((flags & TokenFlag::AtStartOfLine) != 0) - m_lexerFlags |= kLexerFlag_InDirective; - break; - // - // And if we saw an end-of-line during a directive, then we are - // now leaving that directive. - // - case TokenType::EndOfDirective: - m_lexerFlags &= ~kLexerFlag_InDirective; - break; - default: - break; + { + // If we read some token other then the above cases, then we are + // neither after whitespace nor at the start of a line. + // + m_tokenFlags = 0; + break; + } } token.type = tokenType; + token.flags = tokenFlags; char const* textEnd = m_cursor; @@ -1308,7 +1282,7 @@ namespace Slang // Only perform this work if we encountered an escaped newline // while lexing this token (e.g., keep a flag on the lexer), or // do it on-demand when the actual value of the token is needed. - if (m_tokenFlags & TokenFlag::ScrubbingNeeded) + if (tokenFlags & TokenFlag::ScrubbingNeeded) { // Allocate space that will always be more than enough for stripped contents char* startDst = (char*)m_memoryArena->allocateUnaligned(textEnd - textBegin); @@ -1348,10 +1322,6 @@ namespace Slang } } - token.flags = flags; - - m_tokenFlags = 0; - if (tokenType == TokenType::Identifier) { token.setName(m_namePool->getName(token.getContent())); @@ -1361,14 +1331,52 @@ namespace Slang } } - TokenList Lexer::lexAllTokens() + TokenList Lexer::lexAllSemanticTokens() { TokenList tokenList; for(;;) { Token token = lexToken(); + + // We are only interested intokens that are semantically + // significant, so we will skip over forms of whitespace + // and comments. + // + switch( token.type ) + { + default: + break; + + case TokenType::WhiteSpace: + case TokenType::BlockComment: + case TokenType::LineComment: + case TokenType::NewLine: + continue; + } + tokenList.add(token); + if(token.type == TokenType::EndOfFile) + return tokenList; + } + } + + TokenList Lexer::lexAllMarkupTokens() + { + TokenList tokenList; + for(;;) + { + Token token = lexToken(); + switch( token.type ) + { + default: + break; + case TokenType::WhiteSpace: + case TokenType::NewLine: + continue; + } + + tokenList.add(token); if(token.type == TokenType::EndOfFile) return tokenList; } diff --git a/source/compiler-core/slang-lexer.h b/source/compiler-core/slang-lexer.h index 3c8d4ca37..23458d396 100644 --- a/source/compiler-core/slang-lexer.h +++ b/source/compiler-core/slang-lexer.h @@ -45,13 +45,21 @@ namespace Slang explicit TokenReader(TokenSpan const& tokens) : m_cursor(tokens.begin()) , m_end (tokens.end ()) - , m_nextToken(tokens.begin() ? *tokens.begin() : getEndOfFileToken()) - {} + { + _updateLookaheadToken(); + } explicit TokenReader(TokenList const& tokens) : m_cursor(tokens.begin()) , m_end (tokens.end ()) - , m_nextToken(tokens.begin() ? *tokens.begin() : getEndOfFileToken()) - {} + { + _updateLookaheadToken(); + } + explicit TokenReader(Token const* begin, Token const* end) + : m_cursor(begin) + , m_end (end) + { + _updateLookaheadToken(); + } struct ParsingCursor { Token nextToken; @@ -85,34 +93,25 @@ namespace Slang const Token* m_cursor; const Token* m_end; static Token getEndOfFileToken(); + + private: + /// Update the lookahead token in `m_nextToken` to reflect the cursor state + void _updateLookaheadToken(); }; typedef unsigned int LexerFlags; enum { - kLexerFlag_InDirective = 1 << 0, ///< Turn end-of-line and end-of-file into end-of-directive - kLexerFlag_ExpectFileName = 1 << 1, ///< Support `<>` style strings for file paths - kLexerFlag_IgnoreInvalid = 1 << 2, ///< Suppress errors about invalid/unsupported characters - kLexerFlag_ExpectDirectiveMessage = 1 << 3, ///< Don't lexer ordinary tokens, and instead consume rest of line as a string + kLexerFlag_SuppressDiagnostics = 1 << 2, ///< Suppress errors about invalid/unsupported characters }; struct Lexer { - typedef uint32_t OptionFlags; - struct OptionFlag - { - enum Enum : OptionFlags - { - TokenizeComments = 1 << 0, ///< If set comments will be output to the token stream - }; - }; - void initialize( SourceView* sourceView, DiagnosticSink* sink, NamePool* namePool, - MemoryArena* memoryArena, - OptionFlags optionFlags = 0); + MemoryArena* memoryArena); ~Lexer(); @@ -126,12 +125,20 @@ namespace Slang /// not needed by the DiagnosticSink. static UnownedStringSlice sourceLocationLexer(const UnownedStringSlice& in); - Token lexToken(LexerFlags extraFlags = 0); + /// Lex the next token in the input stream, returning an EOF token if at end. + Token lexToken(); - TokenList lexAllTokens(); + /// Lex all tokens (up to the end of the stream) that are semantically relevant + TokenList lexAllSemanticTokens(); - /// Get the diagnostic sink, taking into account flags. Can return nullptr if ignoring invalid - DiagnosticSink* getDiagnosticSink(LexerFlags flags) { return ((flags & kLexerFlag_IgnoreInvalid) == 0) ? m_sink : nullptr; } + /// Lex all tokens (up to the end of the stream) that are relevant to things like markup + TokenList lexAllMarkupTokens(); + + /// Get the diagnostic sink, taking into account flags. Will return null if suppressing diagnostics. + DiagnosticSink* getDiagnosticSink() + { + return ((m_lexerFlags & kLexerFlag_SuppressDiagnostics) == 0) ? m_sink : nullptr; + } SourceView* m_sourceView; DiagnosticSink* m_sink; @@ -147,7 +154,6 @@ namespace Slang TokenFlags m_tokenFlags; LexerFlags m_lexerFlags; - OptionFlags m_optionFlags; MemoryArena* m_memoryArena; }; diff --git a/source/compiler-core/slang-token-defs.h b/source/compiler-core/slang-token-defs.h index 6cece330e..485429e28 100644 --- a/source/compiler-core/slang-token-defs.h +++ b/source/compiler-core/slang-token-defs.h @@ -18,7 +18,6 @@ TOKEN(Unknown, "<unknown>") TOKEN(EndOfFile, "end of file") -TOKEN(EndOfDirective, "end of line") TOKEN(Invalid, "invalid character") TOKEN(Identifier, "identifier") TOKEN(IntegerLiteral, "integer literal") @@ -26,10 +25,9 @@ TOKEN(FloatingPointLiteral, "floating-point literal") TOKEN(StringLiteral, "string literal") TOKEN(CharLiteral, "character literal") TOKEN(WhiteSpace, "whitespace") -TOKEN(NewLine, "newline") +TOKEN(NewLine, "end of line") TOKEN(LineComment, "line comment") TOKEN(BlockComment, "block comment") -TOKEN(DirectiveMessage, "user-defined message") #define PUNCTUATION(id, text) \ TOKEN(id, "'" text "'") diff --git a/source/compiler-core/slang-token.h b/source/compiler-core/slang-token.h index 9697a5c2d..7feda6824 100644 --- a/source/compiler-core/slang-token.h +++ b/source/compiler-core/slang-token.h @@ -26,9 +26,8 @@ struct TokenFlag { AtStartOfLine = 1 << 0, AfterWhitespace = 1 << 1, - SuppressMacroExpansion = 1 << 2, - ScrubbingNeeded = 1 << 3, - Name = 1 << 4, ///< Determines if 'name' is set or 'chars' in the charsNameUnion + ScrubbingNeeded = 1 << 2, + Name = 1 << 3, ///< Determines if 'name' is set or 'chars' in the charsNameUnion }; }; diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index e2f77aa4b..4c10ff5d5 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -168,11 +168,18 @@ DIAGNOSTIC(15401, Warning, macroNotDefined, "macro '$0' is not defined") DIAGNOSTIC(15403, Error, expectedTokenInMacroParameters, "expected '$0' in macro parameters") DIAGNOSTIC(15404, Warning, builtinMacroRedefinition, "Redefinition of builtin macro '$0'") +DIAGNOSTIC(15405, Error, tokenPasteAtStart, "'##' is not allowed at the start of a macro body") +DIAGNOSTIC(15406, Error, tokenPasteAtEnd, "'##' is not allowed at the end of a macro body") +DIAGNOSTIC(15407, Error, expectedMacroParameterAfterStringize, "'#' in macro body must be followed by the name of a macro parameter") +DIAGNOSTIC(15408, Error, duplicateMacroParameterName, "redefinition of macro parameter '$0'") + // 155xx - macro expansion DIAGNOSTIC(15500, Warning, expectedTokenInMacroArguments, "expected '$0' in macro invocation") DIAGNOSTIC(15501, Error, wrongNumberOfArgumentsToMacro, "wrong number of arguments to macro (expected $0, got $1)") DIAGNOSTIC(15502, Error, errorParsingToMacroInvocationArgument, "error parsing macro '$0' invocation argument to '$1'") +DIAGNOSTIC(15503, Warning, invalidTokenPasteResult, "toking pasting with '##' resulted in the invalid token '$0'") + // 156xx - pragmas DIAGNOSTIC(15600, Error, expectedPragmaDirectiveName, "expected a name after '#pragma'") DIAGNOSTIC(15601, Warning, unknownPragmaDirectiveIgnored, "ignoring unknown directive '#pragma $0'") diff --git a/source/slang/slang-doc-extractor.cpp b/source/slang/slang-doc-extractor.cpp index 3951e4977..97667140e 100644 --- a/source/slang/slang-doc-extractor.cpp +++ b/source/slang/slang-doc-extractor.cpp @@ -834,10 +834,10 @@ SlangResult DocMarkupExtractor::extract(const SearchItemInput* inputs, Index inp // Run the lexer Lexer lexer; - lexer.initialize(sourceView, sink, &namePool, &memoryArena, Lexer::OptionFlag::TokenizeComments); + lexer.initialize(sourceView, sink, &namePool, &memoryArena); // Lex everything - tokens = lexer.lexAllTokens(); + tokens = lexer.lexAllMarkupTokens(); // Let's work out the access diff --git a/source/slang/slang-options.cpp b/source/slang/slang-options.cpp index 8dcaa4d08..4566bf5ec 100644 --- a/source/slang/slang-options.cpp +++ b/source/slang/slang-options.cpp @@ -1638,6 +1638,10 @@ struct OptionsParser { case CodeGenTarget::CPPSource: case CodeGenTarget::PTX: + case CodeGenTarget::CUDASource: + case CodeGenTarget::HostCallable: + case CodeGenTarget::Executable: + case CodeGenTarget::SharedLibrary: rawOutput.isWholeProgram = true; break; default: diff --git a/source/slang/slang-preprocessor.cpp b/source/slang/slang-preprocessor.cpp index 7bee5afd2..d54b35d0b 100644 --- a/source/slang/slang-preprocessor.cpp +++ b/source/slang/slang-preprocessor.cpp @@ -1,198 +1,648 @@ // slang-preprocessor.cpp #include "slang-preprocessor.h" +// This file implements a C/C++-style preprocessor. While it does not aim for 100% +// compatibility with the preprocessor for those languages, it does strive to provide +// the same semantics in most cases users will care about around macros, toking pasting, etc. +// +// The main conceptual difference from a fully C-compatible preprocessor is that +// we do *not* implement distinct tokenization/lexing rules for the preprocessor and +// later compiler stages. Instead, our preprocessor uses the same lexer as the rest +// of the compiler, and operates as logical transformation from one stream of tokens +// to another. + #include "slang-compiler.h" #include "slang-diagnostics.h" #include "../compiler-core/slang-lexer.h" -// Needed so that we can construct modifier syntax to represent GLSL directives -#include "slang-syntax.h" #include <assert.h> -// This file provides an implementation of a simple C-style preprocessor. -// It does not aim for 100% compatibility with any particular preprocessor -// specification, but the goal is to have it accept the most common -// idioms for using the preprocessor, found in shader code in the wild. - namespace Slang { -// State of a preprocessor conditional, which can change when -// we encounter directives like `#elif` or `#endif` -enum class PreprocessorConditionalState +// +// PreprocessorHandler +// + +// The `PreprocessorHandler` interface allows other layers of the compielr to intercept +// important events during preprocessing. The following are the default (empty) implementations +// of the callbacks. + +void PreprocessorHandler::handleEndOfTranslationUnit(Preprocessor* preprocessor) { - Before, // We have not yet seen a branch with a `true` condition. - During, // We are inside the branch with a `true` condition. - After, // We have already seen the branch with a `true` condition. -}; + SLANG_UNUSED(preprocessor); +} -// Represents a preprocessor conditional that we are currently -// nested inside. -struct PreprocessorConditional +void PreprocessorHandler::handleFileDependency(String const& path) { - // The next outer conditional in the current file/stream, or NULL. - PreprocessorConditional* parent; + SLANG_UNUSED(path); +} - // The directive token that started the conditional (an `#if` or `#ifdef`) - Token ifToken; +// In order to simplify the naming scheme, we will nest the implementaiton of the +// preprocessor under an additional namesspace, so taht we can have, e.g., +// `MacroDefinition` instead of `PreprocessorMacroDefinition`. +// +namespace preprocessor +{ - // The `#else` directive token, if one has been seen (otherwise `TokenType::Unknown`) - Token elseToken; +// +// Forward Declarations +// - // The state of the conditional - PreprocessorConditionalState state; -}; +struct MacroDefinition; +struct MacroInvocation; -struct PreprocessorMacro; +// +// Utility Types +// - /// A node in a linked list of macros that are "busy" in an environment. + /// A preprocessor conditional construct that is currently active. + /// + /// This type handles preprocessor conditional structures like + /// `#if` / `#elif` / `#endif`. A single top-level input file + /// will have some number of "active" conditionals at one time, + /// based on the nesting depth of those conditional structures. /// - /// A macro is "busy" if there is already an open expansion of it in - /// the same (or a parent) environment, such that expanding it again - /// in the environment would lead to infinite expansion. + /// Each conditional may be in a distinct state, which decides + /// whether tokens should be skipped or not. /// -struct BusyMacro +struct Conditional +{ + /// A state that a preprocessor conditional can be in. + /// + /// The state of a conditional depends both on what directives + /// have been encountered so far (e.g., just an `#if`, or an + /// `#if` and then an `#else`), as well as what the value + /// of any conditions related to those directives have been. + /// + enum class State + { + /// Indicates that this conditional construct has not yet encountered a branch with a `true` condition. + /// + /// The preprocessor should skip tokens, but should keep scanning and evaluating branch conditions. + Before, + + /// Indicates that this conditional construct is nested inside the branch with a `true` condition + /// + /// The preprocessor should not skip tokens, and should not bother evaluating subsequent branch conditions. + During, + + /// Indicates that this conditional has laready seen the branch with a `true` condition + /// + /// The preprocessor should skip tokens, and should not bother evaluating subsequent branch conditions. + After, + }; + + /// The next outer conditional in the current input file, or NULL if this is the outer-most conditional. + Conditional* parent; + + /// The token that started the conditional (e.g., an `#if` or `#ifdef`) + Token ifToken; + + /// The `#else` directive token, if one has been seen (otherwise has `TokenType::Unknown`) + Token elseToken; + + /// The state of the conditional + State state; +}; + + /// An environment used for mapping macro names to their definitions during preprocessing. + /// +struct Environment { - /// The macro that is busy. - PreprocessorMacro* macro = nullptr; + /// The "outer" environment, to be used if lookup in this env fails + Environment* parent = NULL; + + /// Macros defined in this environment + Dictionary<Name*, MacroDefinition*> macros; - /// The rest of the list of busy macros. - BusyMacro* next = nullptr; + /// Clean up the environment, releasing all macros allocated into it + ~Environment(); }; -struct PreprocessorEnvironment +// +// Input Streams +// + +// A fundamental action in the preprocessor is to transform a stream of +// input tokens to produce a stream of output tokens. The term "macro expansion" +// is used to describe two inter-related transformations of this kind: +// +// * Given an invocation of a macro `M`, we can "play back" the tokens in the +// definition of `M` to produce a stream of tokens, potentially substituting +// in argument values for parameters, pasting tokens, etc. +// +// * Given an input stream, we can scan its tokens looking for macro invocations, +// and upon finding them expand those invocations using the first approach +// outlined here. +// +// In practice, the second kind of expansion needs to abstract over where it +// is reading tokens from: an input file, an existing macro invocation, etc. +// In order to support reading from streams of tokens without knowing their +// exact implementation, we will define an abstract base class for input +// streams. + + /// A logical stream of tokens. +struct InputStream { - // The "outer" environment, to be used if lookup in this env fails - PreprocessorEnvironment* parent = NULL; + /// Initialize an input stream, and assocaite with a specific `preprocessor` + InputStream(Preprocessor* preprocessor) + : m_preprocessor(preprocessor) + {} - /// Macros that should be considered busy in this environment - BusyMacro* busyMacros = nullptr; + // The two fundamental operations that every input stream must support + // are reading one token from the stream, and "peeking" one token into + // the stream to see what will be read next. - // Macros defined in this environment - Dictionary<Name*, PreprocessorMacro*> macros; + /// Read one token from the input stream + /// + /// At the end of the stream should return a token with `TokenType::EndOfFile`. + /// + virtual Token readToken() = 0; - ~PreprocessorEnvironment(); -}; + /// Peek at the next token in the input stream + /// + /// This function should return whatever `readToken()` will return next. + /// + /// At the end of the stream should return a token with `TokenType::EndOfFile`. + /// + virtual Token peekToken() = 0; -// Input tokens can either come from source text, or from macro expansion. -// In general, input streams can be nested, so we have to keep a conceptual -// stack of input. + // Because different implementations of this abstract base class will + // store differnet amounts of data, we need a virtual descritor to + // ensure that we can clean up after them. -struct PrimaryInputStream; + /// Clean up an input stream + virtual ~InputStream() = default; -// A stream of input tokens to be consumed -struct PreprocessorInputStream -{ - // The primary input stream that is the parent to this one, - // or NULL if this stream is itself a primary stream. - PrimaryInputStream* primaryStream; + // Based on `peekToken()` we can define a few more utility functions + // for cases where we only care about certain details of the input. - // The next input stream up the stack, if any. - PreprocessorInputStream* parent; + /// Peek the type of the next token in the input stream. + TokenType peekTokenType() { return peekToken().type; } - // Environment to use when looking up macros - PreprocessorEnvironment* environment; + /// Peek the location of the next token in the input stream. + SourceLoc peekLoc() { return peekToken().loc; } - // Destructor is virtual so that we can clean up - // after concrete subtypes. - virtual ~PreprocessorInputStream() = default; -}; + /// Get the diagnostic sink to use for messages related to this stream + DiagnosticSink* getSink(); -// A "primary" input stream represents the top-level context of a file -// being parsed, and tracks things like preprocessor conditional state -struct PrimaryInputStream : PreprocessorInputStream -{ - // The next *primary* input stream up the stack - PrimaryInputStream* parentPrimaryInputStream; + InputStream* getParent() { return m_parent; } - // The deepest preprocessor conditional active for this stream. - PreprocessorConditional* conditional; + void setParent(InputStream* parent) { m_parent = parent; } - // The lexer state that will provide input - Lexer lexer; + MacroInvocation* getFirstBusyMacroInvocation() { return m_firstBusyMacroInvocation; } - // One token of lookahead - Token token; +protected: + /// The preprocessor that this input stream is being used by + Preprocessor* m_preprocessor = nullptr; + + /// Parent stream in the stack of secondary input streams + InputStream* m_parent = nullptr; + + /// Macro expansions that should be considered "busy" during expansion of this stream + MacroInvocation* m_firstBusyMacroInvocation = nullptr; }; -// A "secondary" input stream represents code that is being expanded -// into the current scope, but which had already been tokenized before. -// -struct PretokenizedInputStream : PreprocessorInputStream +// The simplest types of input streams are those that simply "play back" +// a list of tokens that was already captures. These types of streams +// are primarily used for playing back the tokens inside of a macro body. + + /// An input stream that reads from a list of tokens that had already been tokenized before. + /// +struct PretokenizedInputStream : InputStream { - // Reader for pre-tokenized input - TokenReader tokenReader; + typedef InputStream Super; + + /// Initialize an input stream, and assocaite with a specific `preprocessor` and list of `tokens` + PretokenizedInputStream(Preprocessor* preprocessor, TokenReader const& tokens) + : Super(preprocessor) + , m_tokenReader(tokens) + {} + + // A pretokenized stream implements the key read/peek operations + // by delegating to the underlying token reader. + + virtual Token readToken() SLANG_OVERRIDE + { + return m_tokenReader.advanceToken(); + } + + virtual Token peekToken() SLANG_OVERRIDE + { + return m_tokenReader.peekToken(); + } + +protected: + /// Initialize an input stream, and assocaite with a specific `preprocessor` + PretokenizedInputStream(Preprocessor* preprocessor) + : Super(preprocessor) + {} + + /// Reader for pre-tokenized input + TokenReader m_tokenReader; }; -// A pre-tokenized input stream that will only be used once, and which -// therefore owns the memory for its tokens. -struct SimpleTokenInputStream : PretokenizedInputStream +// While macro bodies are the main use case for pre-tokenized input strams, +// we also use them for a few one-off cases where the preprocessor needs to +// construct one or more tokens on the fly (e.g., when stringizing or pasting +// tokens). These streams differ in that they own the storage for the tokens +// they will play back, because they are effectively "one-shot." + + /// A pre-tokenized input stream that will only be used once, and which therefore owns the memory for its tokens. +struct SingleUseInputStream : PretokenizedInputStream { - // A list of raw tokens that will provide input - TokenList lexedTokens; + typedef PretokenizedInputStream Super; + + SingleUseInputStream(Preprocessor* preprocessor, TokenList const& lexedTokens) + : Super(preprocessor) + , m_lexedTokens(lexedTokens) + { + m_tokenReader = TokenReader(m_lexedTokens); + } + + /// A list of raw tokens that will provide input + TokenList m_lexedTokens; }; -struct MacroExpansion : PretokenizedInputStream +// During macro expansion, or the substitution of parameters into a macro body +// we end up needing to track multiple active input streams, and this is most +// easily done by having a distinct type to represent a stack of input streams. + + /// A stack of input streams, that will always read the next available token from the top-most stream + /// + /// An input stream stack assumes ownership of all streams pushed onto it, and will clean them + /// up when they are no longer active or when the stack gets destructed. + /// +struct InputStreamStack { - // The macro we will expand - PreprocessorMacro* macro; + InputStreamStack() + {} - /// State for marking `macro` as busy in this expansion - BusyMacro busy; + /// Clean up after an input stream stack + ~InputStreamStack() + { + popAll(); + } - // Environment for macro expansion. - // - // For a function-like macro, this will include - // the mapping from macro argument names to - // their values. - // - // For both function-like and object-like macros, - // this will include a marker that registers - // the macro as "busy" during its expansion, so - // that it won't be recursively expanded. - // - PreprocessorEnvironment expansionEnvironment; + /// Push an input stream onto the stack + void push(InputStream* stream) + { + stream->setParent(m_top); + m_top = stream; + } + + /// Pop all input streams on the stack + void popAll() + { + // We need to delete any input streams still on the stack. + // + InputStream* parent = nullptr; + for(InputStream* s = m_top; s; s = parent) + { + parent = s->getParent(); + delete s; + } + m_top = nullptr; + } + + /// Read a token from the top-most input stream with input + /// + /// If there is no input remaining, will return the EOF token + /// of the bottom-most stream. + /// + /// At least one input stream must have been `push()`ed before + /// it is valid to call this operation. + /// + Token readToken() + { + SLANG_ASSERT(m_top); + for(;;) + { + // We always try to read from the top-most stream, and if + // it is not at its end, then we return its next token. + // + auto token = m_top->readToken(); + if( token.type != TokenType::EndOfFile ) + return token; + + // If the top stream has run out of input we try to + // switch to its parent, if any. + // + auto parent = m_top->getParent(); + if(parent) + { + // This stack has taken ownership of the streams, + // and must therefore delete the top stream before + // popping it. + // + delete m_top; + m_top = parent; + continue; + } + + // If the top stream did *not* have a parent (meaning + // it was also the bottom stream), then we don't try + // to pop it and instead return its EOF token as-is. + // + return token; + } + } + + /// Peek a token from the top-most input stream with input + /// + /// If there is no input remaining, will return the EOF token + /// of the bottom-most stream. + /// + /// At least one input stream must have been `push()`ed before + /// it is valid to call this operation. + /// + Token peekToken() + { + // The logic here mirrors `readToken()`, but we do not + // modify the `m_top` value or delete streams when they + // are at their end, so that we don't disrupt any state + // that might depend on which streams are present on + // the stack. + // + // Note: One might ask why we cannot just pop input + // streams that are at their end immediately. The basic + // reason has to do with determining what macros were + // "busy" when considering expanding a new one. + // Consider: + // + // #define BAD A B C BAD + // + // BAD X Y Z + // + // When expanding the invocation of `BAD`, we will eventually + // reach a point where the `BAD` in the expansion has been read + // and we are considering whether to consider it as a macro + // invocation. + // + // In this case it is clear that the Right Answer is that the + // original invocation of `BAD` is still active, and thus + // the macro is busy. To ensure that behavior, we want to + // be able to detect that the stream representing the + // expansion of `BAD` is still active even after we read + // the `BAD` token. + // + // TODO: Consider whether we can streamline the implementaiton + // an remove this wrinkle. + // + auto top = m_top; + for(;;) + { + SLANG_ASSERT(top); + auto token = top->peekToken(); + if( token.type != TokenType::EndOfFile ) + return token; + + auto parent = top->getParent(); + if(parent) + { + top = parent; + continue; + } + + return token; + } + } + + /// Return type of the token that `peekToken()` will return + TokenType peekTokenType() + { + return peekToken().type; + } + + /// Return location of the token that `peekToken()` will return + SourceLoc peekLoc() + { + return peekToken().loc; + } + + /// Skip over all whitespace tokens in the input stream(s) to arrive at the next non-whitespace token + void skipAllWhitespace() + { + for( ;;) + { + switch(peekTokenType()) + { + default: + return; + + // Note: We expect `NewLine` to be the only case of whitespace we + // encounter right now, because all the other cases will have been + // filtered out by the `LexerInputStream`. + // + case TokenType::NewLine: + case TokenType::WhiteSpace: + case TokenType::BlockComment: + case TokenType::LineComment: + readToken(); + break; + } + } + } + + /// Get the top stream of the input stack + InputStream* getTopStream() + { + return m_top; + } + + /// Get the input stream that the next token would come from + InputStream* getNextStream() + { + SLANG_ASSERT(m_top); + auto top = m_top; + for(;;) + { + auto token = top->peekToken(); + if( token.type != TokenType::EndOfFile ) + return top; + + auto parent = top->getParent(); + if(parent) + { + top = parent; + continue; + } + + return top; + } + } + +private: + /// The top of the stack of input streams + InputStream* m_top = nullptr; }; -// An enumeration for the different types of macros -enum class PreprocessorMacroFlavor +// Another (relatively) simple case of an input stream is one that reads +// tokens directly from the lexer. +// +// It might seem like we could simplify things even further by always lexing +// a file into tokens first, and then using the earlier input-stream cases +// for pre-tokenized input. The main reason we don't use that strategy is +// that when dealing with preprocessor conditionals we will often want to +// suppress diagnostic messages coming from the lexer when inside of disabled +// conditional branches. +// +// TODO: We might be able to simplify the logic here by having the lexer buffer +// up the issues it diagnoses along with a list of tokens, rather than diagnose +// them directly, and then have the preprocessor or later compilation stages +// take responsibility for actually emitting those diagnostics. + + /// An input stream that reads tokens directly using the Slang `Lexer` +struct LexerInputStream : InputStream { - ObjectLike, - FunctionArg, - FunctionLike, - BuiltinLine, /// builtin macro __LINE__ - BuiltinFile, /// builtin macro __FILE__ + typedef InputStream Super; + + LexerInputStream( + Preprocessor* preprocessor, + SourceView* sourceView); + + Lexer* getLexer() { return &m_lexer; } + + // A common thread to many of the input stream implementations is to + // use a single token of lookahead in order to suppor the `peekToken()` + // operation with both simplicity and efficiency. + + Token readToken() SLANG_OVERRIDE + { + auto result = m_lookaheadToken; + m_lookaheadToken = _readTokenImpl(); + return result; + } + + Token peekToken() SLANG_OVERRIDE + { + return m_lookaheadToken; + } + +private: + /// Read a token from the lexer, bypassing lookahead + Token _readTokenImpl() + { + for(;;) + { + Token token = m_lexer.lexToken(); + switch(token.type) + { + default: + return token; + + case TokenType::WhiteSpace: + case TokenType::BlockComment: + case TokenType::LineComment: + break; + } + } + } + + /// The lexer state that will provide input + Lexer m_lexer; + + /// One token of lookahead + Token m_lookaheadToken; }; -SLANG_FORCE_INLINE bool isBuiltinMacro(PreprocessorMacroFlavor flavor) -{ - return flavor == PreprocessorMacroFlavor::BuiltinLine || flavor == PreprocessorMacroFlavor::BuiltinFile; -} +// The remaining input stream cases deal with macro expansion, so it is +// probalby a good idea to discuss how macros are represented by the +// preprocessor as a first step. +// +// Note that there is an important distinction between a macro *definition* +// and a macro *invocation*, similar to how we distinguish a function definition +// from a call to that function. -// In the current design (which we may want to re-consider), -// a macro is a specialized flavor of input stream, that -// captures the token list in its expansion, and then -// can be "played back." -struct PreprocessorMacro + + /// A definition of a macro +struct MacroDefinition { - // The flavor of macro - PreprocessorMacroFlavor flavor; + /// The "flavor" / type / kind of a macro definition + enum class Flavor + { + /// A function-like macro (e.g., `#define INC(x) (x)++`) + FunctionLike, - // The name under which the macro was `#define`d - NameLoc nameAndLoc; + /// An user-defiend object-like macro (e.g., `#define N 100`) + ObjectLike, - // Parameters of the macro, in case of a function-like macro - List<NameLoc> params; + /// An object-like macro that is built in to the copmiler (e.g., `__LINE__`) + BuiltinObjectLike, + }; - // The tokens that make up the macro body + // The body of a macro definition is input as a stream of tokens, but + // when "playing back" a macro it is helpful to process those tokens + // into a form where a lot of the semantic questions have been answered. + // + // We will chop up the tokens that macro up a macro definition/body into + // distinct *ops* where each op has an *opcode* that defines how that + // token or range of tokens behaves. + + /// Opcode for an `Op` in a macro definition + enum class Opcode + { + /// A raw span of tokens from the macro body (no subsitution needed) + /// + /// The `index0` and `index1` fields form a begin/end pair of tokens + RawSpan, + + /// A parameter of the macro, which should have expansion applied to it + /// + /// The `index0` opcode is the index of the token that named the parameter + /// The `index1` field is the zero-based index of the chosen parameter + ExpandedParam, + + /// A parameter of the macro, which should *not* have expansion applied to it + /// + /// The `index0` opcode is the index of the token that named the parameter + /// The `index1` field is the zero-based index of the chosen parameter + UnexpandedParam, + + /// A parameter of the macro, stringized (and not expanded) + /// + /// The `index0` opcode is the index of the token that named the parameter + /// The `index1` field is the zero-based index of the chosen parameter + StringizedParam, + + /// A paste of the last token of the preceding op and the first token of the next + /// + /// The `index0` opcode is the index of the `##` token + TokenPaste, + + /// builtin expansion behavior for `__LINE__` + BuiltinLine, + + /// builtin expansion behavior for `__FILE__` + BuiltinFile, + }; + + /// A single op in the definition of the macro + struct Op + { + /// The opcode that defines how to interpret this op + Opcode opcode = Opcode::RawSpan; + + /// Two operands, with interpretation depending on the `opcode` + Index index0 = 0; + Index index1 = 0; + }; + + /// The flavor of macro + MacroDefinition::Flavor flavor; + + /// The name under which the macro was `#define`d + NameLoc nameAndLoc; + + /// The tokens that make up the macro body TokenList tokens; - // The environment in which this macro needs to be expanded. - // For ordinary macros this will be the global environment, - // while for function-like macro arguments, it will be - // the environment of the macro invocation. - PreprocessorEnvironment* environment; + /// List ops that describe how this macro expands + List<Op> ops; + + /// Parameters of the macro, in case of a function-like macro + List<NameLoc> params; - // Name* getName() { return nameAndLoc.name; @@ -202,541 +652,465 @@ struct PreprocessorMacro { return nameAndLoc.loc; } -}; -// State of the preprocessor -struct Preprocessor -{ - // diagnostics sink to use when writing messages - DiagnosticSink* sink; + bool isBuiltin() + { + return flavor == MacroDefinition::Flavor::BuiltinObjectLike; + } +}; - // Functionality for looking up files in a `#include` directive - IncludeSystem* includeSystem; +// When a macro is invoked, we conceptually want to "play back" the ops +// that make up the macro's definition. The `MacroInvocation` type logically +// represents an invocation of a macro and handles the complexities of +// playing back its definition with things like argument substiution. - // Current input stream (top of the stack of input) - PreprocessorInputStream* inputStream; + /// An invocation/call of a macro, which can provide tokens of its expansion +struct MacroInvocation : InputStream +{ + typedef InputStream Super; - // Currently-defined macros - PreprocessorEnvironment globalEnv; + /// Create a new expansion of `macro` + MacroInvocation( + Preprocessor* preprocessor, + MacroDefinition* macro, + MacroInvocation* nextBusyMacroInvocation, + SourceLoc macroInvocationLoc, + SourceLoc initiatingMacroInvocationLoc); - // A pre-allocated token that can be returned to - // represent end-of-input situations. - Token endOfFileToken; + /// Prime the input stream + /// + /// This operation *must* be called before the first `readToken()` or `peekToken()` + void prime(); - /// Callback handlers - PreprocessorHandler* handler = nullptr; + // The `readToken()` and `peekToken()` operations for a macro invocation + // will be implemented by using one token of lookahead, which makes the + // operations relatively simple. - // The unique identities of any paths that have issued `#pragma once` directives to - // stop them from being included again. - HashSet<String> pragmaOnceUniqueIdentities; + virtual Token readToken() SLANG_OVERRIDE + { + Token result = m_lookaheadToken; + m_lookaheadToken = _readTokenImpl(); + return result; + } - /// Name pool to use when creating `Name`s from strings - NamePool* namePool = nullptr; + virtual Token peekToken() SLANG_OVERRIDE + { + return m_lookaheadToken; + } - /// File system to use when looking up files - ISlangFileSystemExt* fileSystem = nullptr; + /// Is the given `macro` considered "busy" during the given macroinvocation? + static bool isBusy(MacroDefinition* macro, MacroInvocation* duringMacroInvocation); - /// Source manager to use when loading source files - SourceManager* sourceManager = nullptr; +private: + // Macro invocations are created as part of applying macro expansion + // to a stream, so the `ExpansionInputStream` type takes responsibility + // for setting up much of the state of a `MacroInvocation`. + // + friend struct ExpansionInputStream; - /// Stores the initiating macro source location. - SourceLoc initiatingMacroSourceLoc; + /// The macro being expanded + MacroDefinition* m_macro; - NamePool* getNamePool() { return namePool; } - SourceManager* getSourceManager() { return sourceManager; } -}; + /// A single argument to the macro invocation + /// + /// Each argument is represented as a begin/end pair of indices + /// into the sequence of tokens that make up the macro arguments. + /// + struct Arg + { + Index beginTokenIndex = 0; + Index endTokenIndex = 0; + }; + /// Tokens that make up the macro arguments, in case of function-like macro expansion + List<Token> m_argTokens; + /// Arguments to the macro, in the case of a function-like macro expansion + List<Arg> m_args; -static Token AdvanceToken(Preprocessor* preprocessor); + /// Additional macros that should be considered "busy" during this expansion + MacroInvocation* m_nextBusyMacroInvocation; -// Convenience routine to access the diagnostic sink -static DiagnosticSink* GetSink(Preprocessor* preprocessor) -{ - return preprocessor->sink; -} + /// Locatin of the macro invocation that led to this expansion + SourceLoc m_macroInvocationLoc; -// -// Forward declarations -// + /// Location of the "iniating" macro invocation in cases where multiple + /// nested macro invocations might be in flight. + SourceLoc m_initiatingMacroInvocationLoc; -static void DestroyConditional(PreprocessorConditional* conditional); -static void DestroyMacro(Preprocessor* preprocessor, PreprocessorMacro* macro); -static bool IsSkipping(Preprocessor* preprocessor); + /// One token of lookahead + Token m_lookaheadToken; -// -// Basic Input Handling -// + /// Actually read a new token (not just using the lookahead) + Token _readTokenImpl(); -// Create a fresh input stream -static void initializeInputStream(Preprocessor* preprocessor, PreprocessorInputStream* inputStream) -{ - inputStream->parent = NULL; - inputStream->environment = &preprocessor->globalEnv; -} + // In order to play back a macro definition, we will play back the ops + // in its body one at a time. Each op may expand to a stream of zero or + // more tokens, so we need some state to track all of that. -static void initializePrimaryInputStream(Preprocessor* preprocessor, PrimaryInputStream* inputStream) -{ - initializeInputStream(preprocessor, inputStream); - inputStream->primaryStream = inputStream; - inputStream->conditional = NULL; -} + /// One or more input streams representing the current "op" being expanded + InputStreamStack m_currentOpStreams; -// Destroy an input stream -static void destroyInputStream(Preprocessor* /*preprocessor*/, PreprocessorInputStream* inputStream) -{ - delete inputStream; -} + /// The index into the macro's list of the current operation being played back + Index m_macroOpIndex = 0; -// Create an input stream to represent a pre-tokenized input file. -// TODO(tfoley): pre-tokenizing files isn't going to work in the long run. -static PreprocessorInputStream* CreateInputStreamForSource( - Preprocessor* preprocessor, - SourceView* sourceView) -{ - MemoryArena* memoryArena = sourceView->getSourceManager()->getMemoryArena(); + /// Initialize the input stream for the current macro op + void _initCurrentOpStream(); - PrimaryInputStream* inputStream = new PrimaryInputStream(); - initializePrimaryInputStream(preprocessor, inputStream); + /// Push a stream onto `m_currentOpStreams` that consists of a single token + void _pushSingleTokenStream(TokenType tokenType, SourceLoc tokenLoc, UnownedStringSlice const& content); - // initialize the embedded lexer so that it can generate a token stream - inputStream->lexer.initialize(sourceView, GetSink(preprocessor), preprocessor->getNamePool(), memoryArena); - inputStream->token = inputStream->lexer.lexToken(); + /// Push a stream for a source-location builtin (`__FILE__` or `__LINE__`), with content set up by `valueBuilder` + template<typename F> + void _pushStreamForSourceLocBuiltin(TokenType tokenType, F const& valueBuilder); +}; - return inputStream; -} +// Playing back macro bodies for macro invocations is one part of the expansion process, and the other +// is scanning through a token stream and identifying macro invocations that need to be expanded. +// Rather than have one stream type try to handle both parts of the process, we use a distinct type +// to handle scanning for macro invocations. +// +// By using two distinct stream types we are able to handle intriciate details of the C/C++ preprocessor +// like how the argument tokens to a macro are expanded before they are subsituted into the body, and then +// are subject to another round of macro expansion *after* substitution. -static PrimaryInputStream* asPrimaryInputStream(PreprocessorInputStream* inputStream) + /// An input stream that applies macro expansion to another stream +struct ExpansionInputStream : InputStream { - auto primaryStream = inputStream->primaryStream; - if(primaryStream == inputStream) - return primaryStream; - return nullptr; -} - + typedef InputStream Super; -static void PushInputStream(Preprocessor* preprocessor, PreprocessorInputStream* inputStream) -{ - inputStream->parent = preprocessor->inputStream; - if(!asPrimaryInputStream(inputStream)) - inputStream->primaryStream = preprocessor->inputStream->primaryStream; - preprocessor->inputStream = inputStream; -} + /// Construct an input stream that applies macro expansion to `base` + ExpansionInputStream( + Preprocessor* preprocessor, + InputStream* base) + : Super(preprocessor) + , m_base(base) + { + m_inputStreams.push(base); + m_lookaheadToken = _readTokenImpl(); + } -// Called when we reach the end of an input stream. -// Performs some validation and then destroys the input stream if required. -static void EndInputStream(Preprocessor* preprocessor, PreprocessorInputStream* inputStream) -{ - if(auto primaryStream = asPrimaryInputStream(inputStream)) + Token readToken() SLANG_OVERRIDE { - // If there are any conditionals that weren't completed, then it is an error - if (primaryStream->conditional) - { - PreprocessorConditional* conditional = primaryStream->conditional; + // Reading a token from an expansion strema amounts to checking + // whether the current state of the input stream marks the start + // of a macro invocation (in which case we push the resulting + // invocation onto the input stack), and then reading a token + // from whatever stream is on top of the stack. - GetSink(preprocessor)->diagnose(conditional->ifToken.loc, Diagnostics::endOfFileInPreprocessorConditional); + _maybeBeginMacroInvocation(); - while (conditional) - { - PreprocessorConditional* parent = conditional->parent; - DestroyConditional(conditional); - conditional = parent; - } - } + Token result = m_lookaheadToken; + m_lookaheadToken = _readTokenImpl(); + return result; } - destroyInputStream(preprocessor, inputStream); -} + Token peekToken() SLANG_OVERRIDE + { + _maybeBeginMacroInvocation(); + return m_lookaheadToken; + } -// Consume one token from an input stream -static Token AdvanceRawToken(PreprocessorInputStream* inputStream, LexerFlags lexerFlags = 0) -{ - if( auto primaryStream = asPrimaryInputStream(inputStream) ) + // The "raw" read operations on an expansion input strema bypass + // macro expansion and just read whatever token is next in the + // input. These are useful for the top-level input stream of + // a file, since we often want to read unexpanded tokens for + // preprocessor directives. + + Token readRawToken() { - auto result = primaryStream->token; - primaryStream->token = primaryStream->lexer.lexToken(lexerFlags); + Token result = m_lookaheadToken; + m_lookaheadToken = _readTokenImpl(); return result; } - else + + Token peekRawToken() { - PretokenizedInputStream* pretokenized = dynamic_cast<PretokenizedInputStream*>(inputStream); - SLANG_ASSERT(pretokenized); - return pretokenized->tokenReader.advanceToken(); + return m_lookaheadToken; } -} -// Peek one token from an input stream -static Token PeekRawToken(PreprocessorInputStream* inputStream) -{ - if( auto primaryStream = asPrimaryInputStream(inputStream) ) + TokenType peekRawTokenType() { return peekRawToken().type; } + +private: + /// The base stream that macro expansion is being applied to + InputStream* m_base = nullptr; + + /// A stack of the base stream and active macro invocation in flight + InputStreamStack m_inputStreams; + + /// Location of the "iniating" macro invocation in cases where multiple + /// nested macro invocations might be in flight. + SourceLoc m_initiatingMacroInvocationLoc; + + /// One token of lookahead + Token m_lookaheadToken; + + /// Read a token, bypassing lookahead + Token _readTokenImpl() { - return primaryStream->token; + Token token = m_inputStreams.readToken(); + return token; } - else + + /// Look at current input state and decide whether it represents a macro invocation + void _maybeBeginMacroInvocation(); + + /// Parse one argument to a macro invocation + MacroInvocation::Arg _parseMacroArg(MacroInvocation* macroInvocation); + + /// Parse all arguments to a macro invocation + Index _parseMacroArgs( + MacroDefinition* macro, + MacroInvocation* macroInvocation); + + /// Push the given macro invocation into the stack of input streams + void _pushMacroInvocation( + MacroInvocation* macroInvocation); +}; + +// The top-level flow of the preprocessor is that it processed *input files* +// that contain both directives and ordinary tokens. +// +// Input files are a bit like token streams, but they don't fit neatly into +// the same abstraction due to all the special-case handling that directives +// and conditionals require. + + /// An input file being processed by the preprocessor. + /// + /// An input file manages both the expansion of lexed tokens + /// from the source file, and also state related to preprocessor + /// directives, including skipping of code due to `#if`, etc. + /// +struct InputFile +{ + InputFile( + Preprocessor* preprocessor, + SourceView* sourceView); + + ~InputFile(); + + /// Is this input file skipping tokens (because the current location is inside a disabled condition)? + bool isSkipping(); + + /// Get the inner-most conditional that is in efffect at the current location + Conditional* getInnerMostConditional() { return m_conditional; } + + /// Push a new conditional onto the stack of conditionals in effect + void pushConditional(Conditional* conditional) { - PretokenizedInputStream* pretokenized = dynamic_cast<PretokenizedInputStream*>(inputStream); - SLANG_ASSERT(pretokenized); - return pretokenized->tokenReader.peekToken(); + conditional->parent = m_conditional; + m_conditional = conditional; } -} -// Peek one token type from an input stream -static TokenType PeekRawTokenType(PreprocessorInputStream* inputStream) -{ - if( auto primaryStream = asPrimaryInputStream(inputStream) ) + /// Pop the inner-most conditional + void popConditional() { - return primaryStream->token.type; + auto conditional = m_conditional; + SLANG_ASSERT(conditional); + m_conditional = conditional->parent; + delete conditional; } - else + + /// Read one token using all the expansion and directive-handling logic + Token readToken() { - PretokenizedInputStream* pretokenized = dynamic_cast<PretokenizedInputStream*>(inputStream); - SLANG_ASSERT(pretokenized); - return pretokenized->tokenReader.peekTokenType(); + return m_expansionStream->readToken(); } -} + Lexer* getLexer() { return m_lexerStream->getLexer(); } -// Read one token in "raw" mode (meaning don't expand macros) -static Token AdvanceRawToken(Preprocessor* preprocessor, LexerFlags lexerFlags = 0) -{ - for(;;) - { - // Look at the input stream on top of the stack - PreprocessorInputStream* inputStream = preprocessor->inputStream; + ExpansionInputStream* getExpansionStream() { return m_expansionStream; } - // If there isn't one, then there is no more input left to read. - if(!inputStream) - { - return preprocessor->endOfFileToken; - } +private: + friend struct Preprocessor; - // The top-most input stream may be at its end - if(PeekRawTokenType(inputStream) == TokenType::EndOfFile) - { - // If there is another stream remaining, switch to it - if(inputStream->parent) - { - preprocessor->inputStream = inputStream->parent; - EndInputStream(preprocessor, inputStream); - continue; - } - } + /// The parent preprocessor + Preprocessor* m_preprocessor = nullptr; - // Everything worked, so read a token from the top-most stream - return AdvanceRawToken( - inputStream, - lexerFlags | (IsSkipping(preprocessor) ? kLexerFlag_IgnoreInvalid : 0)); - } -} + /// The next outer input file + /// + /// E.g., if this file was `#include`d from another file, then `m_parent` would be + /// the file with the `#include` directive. + /// + InputFile* m_parent = nullptr; -// Return the next token in "raw" mode, but don't advance the -// current token state. -static Token PeekRawToken(Preprocessor* preprocessor) -{ - // We need to find the stream that `advanceRawToken` would read from. - PreprocessorInputStream* inputStream = preprocessor->inputStream; - for (;;) - { - if (!inputStream) - { - // No more input streams left to read - return preprocessor->endOfFileToken; - } + /// The inner-most preprocessor conditional active for this file. + Conditional* m_conditional = nullptr; - // The top-most input stream may be at its end, so - // look one entry up the stack (don't actually pop - // here, since we are just peeking) - if (PeekRawTokenType(inputStream) == TokenType::EndOfFile) - { - if (inputStream->parent) - { - inputStream = inputStream->parent; - continue; - } - } + /// The lexer input stream that unexpanded tokens will be read from + LexerInputStream* m_lexerStream; - // Everything worked, so the token we just peeked is fine. - return PeekRawToken(inputStream); - } -} + /// An input stream that applies macro expansion to `m_lexerStream` + ExpansionInputStream* m_expansionStream; +}; -// Get the location of the current (raw) token -static SourceLoc PeekLoc(Preprocessor* preprocessor) + /// State of the preprocessor +struct Preprocessor { - return PeekRawToken(preprocessor).loc; -} + /// Diagnostics sink to use when writing messages + DiagnosticSink* sink = nullptr; -// Get the `TokenType` of the current (raw) token -static TokenType PeekRawTokenType(Preprocessor* preprocessor) -{ - return PeekRawToken(preprocessor).type; -} + /// Functionality for looking up files in a `#include` directive + IncludeSystem* includeSystem = nullptr; -// -// Macros -// + /// A stack of "active" input files + InputFile* m_currentInputFile = nullptr; -// Create a macro -static PreprocessorMacro* CreateMacro(Preprocessor* preprocessor) -{ - // TODO(tfoley): Allocate these more intelligently. - // For example, consider pooling them on the preprocessor. + // TODO: We could split the macro environment into a `globalEnv` + // and a `superGlobalEnv` such that built-in macros like `__FILE__` + // and `__LINE__` are defined in the super-global environment so + // that they can be shadowed by user-defined macros but will again + // be available after an `#undef`. - PreprocessorMacro* macro = new PreprocessorMacro(); - macro->flavor = PreprocessorMacroFlavor::ObjectLike; - macro->environment = &preprocessor->globalEnv; - return macro; -} + /// Currently-defined macros + Environment globalEnv; -// Destroy a macro -static void DestroyMacro(Preprocessor* /*preprocessor*/, PreprocessorMacro* macro) -{ - delete macro; -} + /// A pre-allocated token that can be returned to represent end-of-input situations. + Token endOfFileToken; + /// Callback handlers + PreprocessorHandler* handler = nullptr; -// Find the currently-defined macro of the given name, or return NULL -static PreprocessorMacro* LookupMacro(PreprocessorEnvironment* environment, Name* name) -{ - for(PreprocessorEnvironment* e = environment; e; e = e->parent) - { - PreprocessorMacro* macro = NULL; - if (e->macros.TryGetValue(name, macro)) - return macro; - } + /// The unique identities of any paths that have issued `#pragma once` directives to + /// stop them from being included again. + HashSet<String> pragmaOnceUniqueIdentities; - return NULL; -} + /// Name pool to use when creating `Name`s from strings + NamePool* namePool = nullptr; + /// File system to use when looking up files + ISlangFileSystemExt* fileSystem = nullptr; -static PreprocessorEnvironment* GetCurrentEnvironment(Preprocessor* preprocessor) -{ - // The environment we will use for looking up a macro is associated - // with the current input stream (because it may include entries - // for macro arguments). - // - // We need to be careful, though, when we are at the end of an - // input stream (e.g., representing one argument), so that we - // don't use its environment. + /// Source manager to use when loading source files + SourceManager* sourceManager = nullptr; - PreprocessorInputStream* inputStream = preprocessor->inputStream; + /// Stores the initiating macro source location. + SourceLoc initiatingMacroSourceLoc; - for(;;) - { - // If there is no input stream that isn't at its end, - // then fall back to the global environment. - if (!inputStream) - return &preprocessor->globalEnv; + NamePool* getNamePool() { return namePool; } + SourceManager* getSourceManager() { return sourceManager; } - // If the current input stream is at its end, then - // fall back to its parent stream. - if (PeekRawTokenType(inputStream) == TokenType::EndOfFile) - { - inputStream = inputStream->parent; - continue; - } + /// Push a new input file onto the input stack of the preprocessor + void pushInputFile(InputFile* inputFile); + + /// Pop the inner-most input file from the stack of input files + void popInputFile(); +}; - // If we've found an active stream that isn't at its end, - // then use that for lookup. - return inputStream->environment; - } -} -static bool _isInMacroExpansion(Preprocessor* preprocessor) -{ - return preprocessor->inputStream->environment->busyMacros != nullptr; -} +//static Token AdvanceToken(Preprocessor* preprocessor); -static PreprocessorMacro* LookupMacro(Preprocessor* preprocessor, Name* name) +// Convenience routine to access the diagnostic sink +static DiagnosticSink* GetSink(Preprocessor* preprocessor) { - return LookupMacro(GetCurrentEnvironment(preprocessor), name); + return preprocessor->sink; } - /// Check if `macro` is "busy" in the given `env`. - /// - /// A macro is "busy" if it is already being used for expansion, such - /// that an attempt to expand it again would lead to infinite expansion. - /// -static bool _isMacroBusy(PreprocessorMacro* macro, PreprocessorEnvironment* env) +DiagnosticSink* InputStream::getSink() { - // The challenge here is that we are implementing expansion - // for arguments to function-like macros in a "lazy" fashion. - // - // The letter of the spec is that we should macro expand - // each argument *before* substitution, and then go and - // macro-expand the substituted body. This means that we - // can invoke a macro as part of an argument to an - // invocation of the same macro: - // - // #define FOO(A,B,C) A + B + C - // - // FOO( 1, FOO(22, 2, 2), 333 ); - // - // In our implementation, the "inner" invocation of `FOO` - // gets expanded at the point where it gets referenced - // in the body of the "outer" invocation of `FOO`. - // Doing things this way leads to greatly simplified - // code for handling expansion. - // - // We solve this problem by having each `PreprocessorEnvironment` - // track an (optional) macro that should be busy in - // that environment. - // - // The environment that we create for the outer expansion - // of `FOO` (the one that will map `A => 1, B => FOO(22,2,2), ...`) - // will track the `FOO` macro because it should be busy - // in the body of `FOO`. - // - // In contrast, the environment used when expanding the parameter - // `B` will just be the environment in place at the macro *invocation* - // site, which in this case is the global environment. - // - // Given the design of putting busy macro state into environments, - // we can easily check if a macro is busy in a given environment - // by walking through the list of busy macros that was registerd - // with that environment. - // - for(auto busyMacro = env->busyMacros; busyMacro; busyMacro = busyMacro->next) - { - if(busyMacro->macro == macro) - return true; - } - return false; + return GetSink(m_preprocessor); } // -// Reading Tokens With Expansion +// Basic Input Handling // -static void initializeMacroExpansion( - Preprocessor* preprocessor, - MacroExpansion* expansion, - PreprocessorMacro* macro) +LexerInputStream::LexerInputStream( + Preprocessor* preprocessor, + SourceView* sourceView) + : Super(preprocessor) { - initializeInputStream(preprocessor, expansion); + MemoryArena* memoryArena = sourceView->getSourceManager()->getMemoryArena(); + m_lexer.initialize(sourceView, GetSink(preprocessor), preprocessor->getNamePool(), memoryArena); + m_lookaheadToken = _readTokenImpl(); +} - expansion->parent = preprocessor->inputStream; - expansion->primaryStream = preprocessor->inputStream->primaryStream; - expansion->macro = macro; +InputFile::InputFile( + Preprocessor* preprocessor, + SourceView* sourceView) +{ + m_preprocessor = preprocessor; - // The macro expansion will read from the stored tokens - // that were recorded in the macro definition. - // - expansion->tokenReader = TokenReader(macro->tokens); + m_lexerStream = new LexerInputStream(preprocessor, sourceView); + m_expansionStream = new ExpansionInputStream(preprocessor, m_lexerStream); +} - // A macro expansion will always occur in its own - // environment. - // - // For a function-like macro this environment will - // map the names of macro parameters to their argument - // token lists. +InputFile::~InputFile() +{ + // We start by deleting any remaining conditionals on the conditional stack. // - // For all macros, this environment will be used - // to track the "busy" state of the macro itself. + // Note: This should only come up in the case where a conditional was not + // terminated before the end of the file. // - expansion->environment = &expansion->expansionEnvironment; + Conditional* parentConditional = nullptr; + for(auto conditional = m_conditional; conditional; conditional = parentConditional) + { + parentConditional = conditional->parent; + delete conditional; + } - // The environment used for expanding a macro is always - // a child of the environment where the macro was defined. + // Note: We only delete the expansion strema here because the lexer + // stream is being used as the "base" stream of the expansion stream, + // and the expansion stream takes responsibility for deleting it. // - PreprocessorEnvironment* parentEnvironment = macro->environment; - expansion->expansionEnvironment.parent = parentEnvironment; - // - // For ordinary function-like and object-like macros, that - // environment will always be the global environment. - // - // For the macros that represent arguments to a function-like - // macro, that environment will be the environment where - // the function-like macro was *invoked*, which might be - // in the context of another macro expansion. + delete m_expansionStream; } -static void pushMacroExpansion( - Preprocessor* preprocessor, - MacroExpansion* expansion, - SourceLoc initiatingMacroSourceLoc) +// +// Macros +// + + +// Find the currently-defined macro of the given name, or return NULL +static MacroDefinition* LookupMacro(Environment* environment, Name* name) { - // Only set the initiating if outside of a macro expansion - if (!_isInMacroExpansion(preprocessor)) + for(Environment* e = environment; e; e = e->parent) { - preprocessor->initiatingMacroSourceLoc = initiatingMacroSourceLoc; + MacroDefinition* macro = NULL; + if (e->macros.TryGetValue(name, macro)) + return macro; } - // Before pushing a macro as an input stream, - // we need to set the appropraite "busy" state - // that will be used during expansions of that - // macro's definition. - - // A macro is always busy in its own expansion environment, - // to prevent recursive expansion. Here we construct a - // link for the linked list of busy macros and install it - // into the environment. - // - // Note: this extra link is unnecessary in the case where - // `macro` is an argument to a function-like macro, because - // there is no way for it to reference itself in its - // expansion. We could try to avoid the extra step at - // the cost of a bit more code complexity here. - // - auto macro = expansion->macro; - expansion->busy.macro = macro; - expansion->expansionEnvironment.busyMacros = &expansion->busy; + return NULL; +} - // What goes into the rest of the list of busy macros - // depends on what kind of macro is being expanded. - // - if( macro->flavor == PreprocessorMacroFlavor::FunctionArg ) - { - // For a macro representing an argument to a function-like - // macro, the busy macros should be those that were in - // place at the invocation site of the function-like macro. - // This happens to be what is stored in the parent - // environment. - // - auto parentEnvironment = expansion->expansionEnvironment.parent; - expansion->busy.next = parentEnvironment->busyMacros; - } - else +bool MacroInvocation::isBusy(MacroDefinition* macro, MacroInvocation* duringMacroInvocation) +{ + for(auto busyMacroInvocation = duringMacroInvocation; busyMacroInvocation; busyMacroInvocation = busyMacroInvocation->m_nextBusyMacroInvocation ) { - // For the other cases (function-like and object-like - // macros), the busy list should include anything - // that was already busy in the environment that - // is beginning to expand a macro. - // - expansion->busy.next = preprocessor->inputStream->environment->busyMacros; + if(busyMacroInvocation->m_macro == macro) + return true; } - - PushInputStream(preprocessor, expansion); + return false; } -static void _addEndOfStreamToken( +MacroInvocation::MacroInvocation( Preprocessor* preprocessor, - PreprocessorMacro* macro) + MacroDefinition* macro, + MacroInvocation* nextBusyMacroInvocation, + SourceLoc macroInvocationLoc, + SourceLoc initiatingMacroInvocationLoc) + : Super(preprocessor) { - Token token = PeekRawToken(preprocessor); - token.type = TokenType::EndOfFile; - macro->tokens.add(token); + m_macro = macro; + m_nextBusyMacroInvocation = nextBusyMacroInvocation; + m_firstBusyMacroInvocation = this; + m_macroInvocationLoc = macroInvocationLoc; + m_initiatingMacroInvocationLoc = initiatingMacroInvocationLoc; } -static SimpleTokenInputStream* createSimpleInputStream( - Preprocessor* preprocessor, - Token const& token) +void MacroInvocation::prime() { - SimpleTokenInputStream* inputStream = new SimpleTokenInputStream(); - initializeInputStream(preprocessor, inputStream); - - inputStream->lexedTokens.add(token); - - Token eofToken; - eofToken.type = TokenType::EndOfFile; - eofToken.loc = token.loc; - eofToken.flags = TokenFlag::AfterWhitespace | TokenFlag::AtStartOfLine; - inputStream->lexedTokens.add(eofToken); - - inputStream->tokenReader = TokenReader(inputStream->lexedTokens); + _initCurrentOpStream(); + m_lookaheadToken = _readTokenImpl(); +} - return inputStream; +void ExpansionInputStream::_pushMacroInvocation( + MacroInvocation* expansion) +{ + m_inputStreams.push(expansion); + m_lookaheadToken = m_inputStreams.readToken(); } /// Parse one macro argument and return it in the form of a macro @@ -746,13 +1120,12 @@ static SimpleTokenInputStream* createSimpleInputStream( /// /// Does not consume any closing `)` or `,` for the argument. /// -static PreprocessorMacro* _parseMacroArg(Preprocessor* preprocessor) +MacroInvocation::Arg ExpansionInputStream::_parseMacroArg(MacroInvocation* macroInvocation) { // Create the argument, represented as a special flavor of macro // - PreprocessorMacro* arg = CreateMacro(preprocessor); - arg->flavor = PreprocessorMacroFlavor::FunctionArg; - arg->environment = GetCurrentEnvironment(preprocessor); + MacroInvocation::Arg arg; + arg.beginTokenIndex = macroInvocation->m_argTokens.getCount(); // We will now read the tokens that make up the argument. // @@ -763,7 +1136,13 @@ static PreprocessorMacro* _parseMacroArg(Preprocessor* preprocessor) int nestingDepth = 0; for(;;) { - switch(PeekRawTokenType(preprocessor)) + arg.endTokenIndex = macroInvocation->m_argTokens.getCount(); + + m_inputStreams.skipAllWhitespace(); + Token token = m_inputStreams.peekToken(); + macroInvocation->m_argTokens.add(token); + + switch(token.type) { case TokenType::EndOfFile: // End of input means end of the argument. @@ -777,7 +1156,6 @@ static PreprocessorMacro* _parseMacroArg(Preprocessor* preprocessor) // if(nestingDepth == 0) { - _addEndOfStreamToken(preprocessor, arg); return arg; } // Otherwise we decrease our nesting depth, add @@ -790,7 +1168,6 @@ static PreprocessorMacro* _parseMacroArg(Preprocessor* preprocessor) // then we are at the end of an argument if (nestingDepth == 0) { - _addEndOfStreamToken(preprocessor, arg); return arg; } // Otherwise we add it as a normal token @@ -807,8 +1184,8 @@ static PreprocessorMacro* _parseMacroArg(Preprocessor* preprocessor) } // Add the token and continue parsing. - arg->tokens.add(AdvanceRawToken(preprocessor)); - } + m_inputStreams.readToken(); + } } /// Parse the arguments to a function-like macro invocation. @@ -818,29 +1195,47 @@ static PreprocessorMacro* _parseMacroArg(Preprocessor* preprocessor) /// /// Returns the number of arguments parsed. /// -static Index _parseMacroArgs( - Preprocessor* preprocessor, - PreprocessorMacro* macro, - MacroExpansion* expansion) +Index ExpansionInputStream::_parseMacroArgs( + MacroDefinition* macro, + MacroInvocation* expansion) { - // If there are no arguments present, then we - // will bail out immediately. + // There is a subtle case here, which is when a macro expects + // exactly one parameter, but the argument list is empty. E.g.: + // + // #define M(x) /* whatever */ + // + // M() + // + // In this case we should parse a single (empty) argument, rather + // than issue an error because of there apparently being zero + // arguments. + // + // In all other cases (macros that do not have exactly one + // parameter) we should treat an empty argument list as zero + // arguments for the purposes of error messages (since that is + // how a programmer is likely to view/understand it). // - switch (PeekRawTokenType(preprocessor)) + Index paramCount = Index(macro->params.getCount()); + if(paramCount != 1) { - case TokenType::RParent: - case TokenType::EndOfFile: - return 0; + // If there appear to be no arguments because the next + // token would close the argument list, then we bail + // out immediately. + // + switch (m_inputStreams.peekTokenType()) + { + case TokenType::RParent: + case TokenType::EndOfFile: + return 0; + } } // Otherwise, we have one or more arguments. - Index paramCount = Index(macro->params.getCount()); Index argCount = 0; for(;;) { // Parse an argument. - PreprocessorMacro* arg = _parseMacroArg(preprocessor); - SLANG_ASSERT(arg); + MacroInvocation::Arg arg = _parseMacroArg(expansion); Index argIndex = argCount++; if(argIndex < paramCount) @@ -849,10 +1244,7 @@ static Index _parseMacroArgs( // parameters of the macro, so we will associate // it with the parameter name. // - NameLoc paramNameAndLoc = macro->params[argIndex]; - Name* paramName = paramNameAndLoc.name; - arg->nameAndLoc = paramNameAndLoc; - expansion->expansionEnvironment.macros[paramName] = arg; + expansion->m_args.add(arg); } else { @@ -864,13 +1256,12 @@ static Index _parseMacroArgs( // need to be disposed of, so that we don't // leak. // - delete arg; } // After consuming one macro argument, we look at // the next token to decide what to do. // - switch( PeekRawTokenType(preprocessor)) + switch(m_inputStreams.peekTokenType()) { case TokenType::RParent: case TokenType::EndOfFile: @@ -884,7 +1275,7 @@ static Index _parseMacroArgs( // continue scanning for more macro // arguments. // - AdvanceRawToken(preprocessor); + readRawToken(); break; default: @@ -896,280 +1287,763 @@ static Index _parseMacroArgs( // ahead for a closing `)`. For now it is simplest // to just bail. // - GetSink(preprocessor)->diagnose(PeekLoc(preprocessor), Diagnostics::errorParsingToMacroInvocationArgument, paramCount, macro->getName()); + getSink()->diagnose(m_inputStreams.peekLoc(), Diagnostics::errorParsingToMacroInvocationArgument, paramCount, macro->getName()); return argCount; } } } - // Check whether the current token on the given input stream should be // treated as a macro invocation, and if so set up state for expanding // that macro. -static void MaybeBeginMacroExpansion( - Preprocessor* preprocessor ) +void ExpansionInputStream::_maybeBeginMacroInvocation() { + auto preprocessor = m_preprocessor; + // We iterate because the first token in the expansion of one // macro may be another macro invocation. for (;;) { - // Look at the next token ahead of us - Token token = PeekRawToken(preprocessor); + // The "next" token to be read is already in our `m_lookeadToken` + // member, so we can simply inspect it. + // + // We also care about where that token came from (which input stream). + // + Token token = m_lookaheadToken; - // Not an identifier? Can't be a macro. + // If the token is not an identifier, then it can't possibly name a macro. + // if (token.type != TokenType::Identifier) + { return; + } - // Look for a macro with the given name. + // We will look for a defined macro matching the name. + // + // If there isn't one this couldn't possibly be the start of a macro + // invocation. + // Name* name = token.getName(); - PreprocessorMacro* macro = LookupMacro(preprocessor, name); - - // Not a macro? Can't be an invocation. + MacroDefinition* macro = LookupMacro(&preprocessor->globalEnv, name); if (!macro) { return; } - // If the macro is busy (already being expanded), - // don't try to trigger recursive expansion - if (_isMacroBusy(macro, GetCurrentEnvironment(preprocessor))) - return; + // Now we get to the slightly trickier cases. + // + // *If* the identifier names a macro, but we are currently in the + // process of expanding the same macro (possibly via multiple + // nested expansions) then we don't want to expand it again. + // + // We determine which macros are currently being expanded + // by looking at the input stream assocaited with that one + // token of lookahead. + // + // Note: it is critical here that `m_inputStreams.getTopStream()` + // returns the top-most stream that was active when `m_lookaheadToken` + // was consumed. This means that an `InputStreamStack` cannot + // "pop" an input stream that it at its end until after something + // tries to read an additional token. + // + auto activeStream = m_inputStreams.getTopStream(); + + // Each input stream keeps track of a linked list of the `MacroInvocation`s + // that are considered "busy" while reading from that stream. + // + auto busyMacros = activeStream->getFirstBusyMacroInvocation(); - // We might already have looked at this token, - // and need to suppress expansion - if (token.flags & TokenFlag::SuppressMacroExpansion) + // If the macro is busy (already being expanded), we don't try to expand + // it again, becaues that would trigger recursive/infinite expansion. + // + if( MacroInvocation::isBusy(macro, busyMacros) ) return; - // A function-style macro invocation should only match - // if the token *after* the identifier is `(`. This - // requires more lookahead than we usually have/need + // At this point we know that the lookahead token names a macro + // definition that is not busy. it is *very* likely that we are + // going to be expanding a macro. + // + // If we aren't already expanding a macro (meaning that the + // current stream tokens are being read from is the "base" stream + // that expansion is being applied to), then we want to consider + // the location of this invocation as the "initiating" macro + // invocation location for things like `__LINE__` uses inside + // of macro bodies. + // + if(activeStream == m_base) + { + m_initiatingMacroInvocationLoc = token.loc; + } + // The next steps depend on whether or not we are dealing + // with a funciton-like macro. + // switch (macro->flavor) { - case PreprocessorMacroFlavor::FunctionLike: + default: { - // Consume the token that (possibly) triggered macro expansion - AdvanceRawToken(preprocessor); + // Object-like macros (whether builtin or user-defined) are the easy case. + // + // We simply create a new macro invocation based on the macro definition, + // prime its input stream, and then push it onto our stack of active + // macro invocations. + // + MacroInvocation* invocation = new MacroInvocation(preprocessor, macro, busyMacros, token.loc, m_initiatingMacroInvocationLoc); + invocation->prime(); + _pushMacroInvocation(invocation); + } + break; - // Look at the next token, and see if it is an opening `(` - // that indicates we should actually expand a macro. - if(PeekRawTokenType(preprocessor) != TokenType::LParent) + case MacroDefinition::Flavor::FunctionLike: + { + // The function-like macro case is more complicated, primarily because + // of the need to handle arguments. The arguments of a function-like + // macro are expected to be tokens inside of balanced `()` parentheses. + // + // One special-case rule of the C/C++ preprocessor is that if the + // name of a function-like macro is *not* followed by a `(`, then + // it will not be subject to macro expansion. This design choice is + // motivated by wanting to be able to create a macro that handles + // direct calls to some primitive, along with a true function that handles + // cases where it is used in other ways. E.g.: + // + // extern int coolFunction(int x); + // + // #define coolFunction(x) x^0xABCDEF + // + // int x = coolFunction(3); // uses the macro + // int (*functionPtr)(int) f = coolFunction; // uses the function + // + // While we don't expect users to make heavy use of this feature in Slang, + // it is worthwhile to try to stay compatible. + // + // Because the macro name is already in `m_lookaheadToken`, we can peak + // at the underlying input stream to see if the next non-whitespace + // token after the lookahead is a `(`. + // + m_inputStreams.skipAllWhitespace(); + Token maybeLeftParen = m_inputStreams.peekToken(); + if(maybeLeftParen.type != TokenType::LParent) { - // In this case, we are in a bit of a mess, because we have - // consumed the token that named the macro, but we need to - // make sure that token (and not whatever came after it) - // gets returned to the user. + // If we see a token other then `(` then we aren't suppsoed to be + // expanding the macro after all. Luckily, there is no state + // that we have to rewind at this point, because we never committed + // to macro expansion or consumed any (non-whitespace) tokens after + // the lookahead. + // + // We can simply bail out of looking for macro invocations, and the + // next read of a token will consume the lookahead token (the macro + // name) directly. // - // To work around this we will construct a short-lived input - // stream just to handle that one token, and also set - // a flag on the token to keep us from doing this logic again. - - token.flags |= TokenFlag::SuppressMacroExpansion; - - SimpleTokenInputStream* simpleStream = createSimpleInputStream(preprocessor, token); - PushInputStream(preprocessor, simpleStream); return; } - MacroExpansion* expansion = new MacroExpansion(); - initializeMacroExpansion(preprocessor, expansion, macro); + // If we saw an opening `(`, then we know we are starting some kind of + // macro invocation, although we don't yet know if it is well-formed. + // + MacroInvocation* invocation = new MacroInvocation(preprocessor, macro, busyMacros, token.loc, m_initiatingMacroInvocationLoc); - // Consume the opening `(` - Token leftParen = AdvanceRawToken(preprocessor); + // We start by consuming the opening `(` that we checked for above. + // + Token leftParen = m_inputStreams.readToken(); + SLANG_ASSERT(leftParen.type == TokenType::LParent); - // Parse the arguments to the macro invocation - Index argCount = _parseMacroArgs(preprocessor, macro, expansion); + // Next we parse any arguments to the macro invocation, which will + // consist of `()`-balanced sequences of tokens separated by `,`s. + // + Index argCount = _parseMacroArgs(macro, invocation); - // Expect a closing ')' - if(PeekRawTokenType(preprocessor) == TokenType::RParent) + // We expect th arguments to be followed by a `)` to match the opening + // `(`, and if we don't find one we need to diagnose the issue. + // + if(m_inputStreams.peekTokenType() == TokenType::RParent) { - AdvanceRawToken(preprocessor); + m_inputStreams.readToken(); } else { - GetSink(preprocessor)->diagnose(PeekLoc(preprocessor), Diagnostics::expectedTokenInMacroArguments, TokenType::RParent, PeekRawTokenType(preprocessor)); + GetSink(preprocessor)->diagnose(m_inputStreams.peekLoc(), Diagnostics::expectedTokenInMacroArguments, TokenType::RParent, m_inputStreams.peekTokenType()); } - // If we didn't parse the expected number of arguments, - // then diagnose an error and do not attempt expansion. + // The number of arguments at the macro invocation site might not + // match the number of arguments declared for the macro. In this + // case we diagnose an issue *and* skip expansion of this invocation + // (it effectively expands to zero new tokens). // - // TODO: This check will need to be updated for variadic macros. + // TODO: If/when we support variadic macros, this check will need to + // handle cases where there are more arguments than declared parameters. // const Index paramCount = Index(macro->params.getCount()); - if (argCount != paramCount) + if( argCount != paramCount ) { - GetSink(preprocessor)->diagnose(PeekLoc(preprocessor), Diagnostics::wrongNumberOfArgumentsToMacro, paramCount, argCount); + GetSink(preprocessor)->diagnose(leftParen.loc, Diagnostics::wrongNumberOfArgumentsToMacro, paramCount, argCount); return; } // Now that the arguments have been parsed and validated, - // we are ready to proceed with expansion of the macro body. + // we are ready to proceed with expansion of the macro invocation. // - pushMacroExpansion(preprocessor, expansion, token.loc); - break; + invocation->prime(); + _pushMacroInvocation(invocation); } - case PreprocessorMacroFlavor::FunctionArg: - case PreprocessorMacroFlavor::ObjectLike: - { - // Consume the token that triggered macro expansion - AdvanceRawToken(preprocessor); + break; + } + } +} - // Object-like macros are the easy case. - MacroExpansion* expansion = new MacroExpansion(); - initializeMacroExpansion(preprocessor, expansion, macro); - pushMacroExpansion(preprocessor, expansion, token.loc); - break; - } - case PreprocessorMacroFlavor::BuiltinLine: - case PreprocessorMacroFlavor::BuiltinFile: +Token MacroInvocation::_readTokenImpl() +{ + // The `MacroInvocation` type maintains an invariant that after each + // call to `_readTokenImpl`: + // + // * The `m_currentOpStreams` stack will be non-empty + // + // * The input state in `m_currentOpStreams` will correspond to the + // macro definition op at index `m_macroOpIndex` + // + // * The next token read from `m_currentOpStreams` will not be an EOF + // *unless* the expansion has reached the end of the macro invocaiton + // + // The first time `_readTokenImpl()` is called, it will only be able + // to rely on the weaker invariant guaranteed by `_initCurrentOpStream()`: + // + // * The `m_currentOpStreams` stack will be non-empty + // + // * The input state in `m_currentOpStreams` will correspond to the + // macro definition op at index `m_macroOpIndex` + // + // * The next token read from `m_currentOpStreams` may be an EOF if + // the current op has an empty expansion. + // + // In either of those cases, we can start by reading the next token + // from the expansion of the current op. + // + Token token = m_currentOpStreams.readToken(); + Index tokenOpIndex = m_macroOpIndex; + + // Once we've read that `token`, we need to work to establish or + // re-establish our invariant, which we do by looping until we are + // in a valid state. + // + for(;;) + { + // At the start of the loop, we already have the weaker invariant + // guaranteed by `_initCurrentOpStream()`: the current op stream + // is in a consistent state, but it *might* be at its end. + // + // If the current stream is *not* at its end, then we seem to + // have the stronger invariant as well, and we can return. + // + if(m_currentOpStreams.peekTokenType()!= TokenType::EndOfFile) + { + // We know that we have tokens remaining to read from + // `m_currentOpStreams`, and we thus expect that the + // `token` we just read must also be a non-EOF token. + // + // Note: This case is subtle, because this might be the first invocation + // of `_readTokenImpl()` after the `_initCurrentOpStream()` call + // as part of `prime()`. It seems that if the first macro op had + // an empty expansion, then `token` might be the EOF for that op. + // + // That detail is handled below in the logic for switching to a new + // macro op. + // + SLANG_ASSERT(token.type != TokenType::EndOfFile); + + // We can safely return with our invaraints intact, because + // the next attempt to read a token will read a non-EOF. + // + return token; + } + + // Otherwise, we have reached the end of the tokens coresponding + // to the current op, and we want to try to advance to the next op + // in the macro definition. + // + Index currentOpIndex = m_macroOpIndex; + Index nextOpIndex = currentOpIndex+1; + + // However, if we are already working on the last op in the macro + // definition, then the next op index is out of range and we don't + // want to advance. Instead we will keep the state of the macro + // invocation where it is: at the end of the last op, returning + // EOF tokens forever. + // + // Note that in this case we do not care whether `token` is an EOF + // or not, because we expect the last op to yield an EOF at the + // end of the macro expansion. + // + if(nextOpIndex == m_macro->ops.getCount()) + return token; + + // Because `m_currentOpStreams` is at its end, we can pop all of + // those streams to reclaim their memory before we push any new + // ones. + // + m_currentOpStreams.popAll(); + + // Now we've commited to moving to the next op in the macro + // definition, and we want to push appropriate streams onto + // the stack of input streams to represent that op. + // + m_macroOpIndex = nextOpIndex; + auto const& nextOp = m_macro->ops[nextOpIndex]; + + // What we do depends on what the next op's opcode is. + // + switch (nextOp.opcode) + { + default: { - const SourceLoc loc = _isInMacroExpansion(preprocessor) ? preprocessor->initiatingMacroSourceLoc : token.loc; + // All of the easy cases are handled by `_initCurrentOpStream()` + // which also gets invoked in the logic of `MacroInvocation::prime()` + // to handle the first op in the definition. + // + // This operation will set up `m_currentOpStreams` so that it + // accurately reflects the expansion of the op at index `m_macroOpIndex`. + // + // What it will *not* do is guarantee that the expansion for that + // op is non-empty. We will thus continue the outer `for` loop which + // checks whether the current op (which we just initialized here) is + // already at its end. + // + _initCurrentOpStream(); - if (!loc.isValid()) + // Before we go back to the top of the loop, we need to deal with the + // important corner case where `token` might have been an EOF because + // the very first op in a macro body had an empty expansion, e.g.: + // + // #define TWELVE(X) X 12 X + // TWELVE() + // + // In this case, the first `X` in the body of the macro will expand + // to nothing, so once that op is set up by `_initCurrentOpStrem()` + // the `token` we read here will be an EOF. + // + // The solution is to detect when all preceding ops considered by + // this loop have been EOFs, and setting the value to the first + // non-EOF token read. + // + if(token.type == TokenType::EndOfFile) { - // If we don't have a valid source location, don't expand - return; + token = m_currentOpStreams.readToken(); + tokenOpIndex = m_macroOpIndex; } + } + break; - AdvanceRawToken(preprocessor); - - SourceManager* sourceManager = preprocessor->getSourceManager(); - - // Since the location can be overridden by #line directives, use the slower path to get the line number - const HumaneSourceLoc humaneSourceLoc = sourceManager->getHumaneLoc(loc); + case MacroDefinition::Opcode::TokenPaste: + { + // The more complicated case is a token paste (`##`). + // + Index tokenPasteTokenIndex = nextOp.index0; + SourceLoc tokenPasteLoc = m_macro->tokens.m_tokens[tokenPasteTokenIndex].loc; - Token newToken; + // A `##` must always appear between two macro ops (whether literal tokens + // or macro parameters) and it is supposed to paste together the last + // token from the left op with the first token from the right op. + // + // We will accumulate the pasted token as a string and then re-lex it. + // + StringBuilder pastedContent; - StringBuilder buf; - if (macro->flavor == PreprocessorMacroFlavor::BuiltinLine) + // Note that this is *not* the same as saying that we paste together the + // last token the preceded the `##` with the first token that follows it. + // In particular, if you have `L ## R` and either `L` or `R` has an empty + // expansion, then the `##` should treat that operand as empty. + // + // As such, there's a few cases to consider here. + // + // If the `tokenOpIndex` that `token` was read from is the op right + // before the `##`, then we know it is the last token produced by + // the preceding op (or possibly an EOF if that ops expansion was empty). + // + if(tokenOpIndex == nextOpIndex-1) { - newToken.type = TokenType::IntegerLiteral; - buf << humaneSourceLoc.line; + if(token.type != TokenType::EndOfFile) + { + pastedContent << token.getContent(); + } } else { - // We need to escape to a string - newToken.type = TokenType::StringLiteral; - - auto escapeHandler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp); - StringEscapeUtil::appendQuoted(escapeHandler, humaneSourceLoc.pathInfo.foundPath.getUnownedSlice(), buf); + // Otherwise, the op that preceded the `##` was *not* the same op + // that produced `token`, which could only happen if that preceding + // op was one that was initialized by this loop and then found to + // have an empty expansion. As such, we don't need to add anything + // onto `pastedContent` in this case. } - // We are going to keep the actual text in the slice pool, so it stays in scope - // and if the value appears multiple times, it will shared - auto& pool = sourceManager->getStringSlicePool(); + // Once we've dealt with the token to the left of the `##` (if any) + // we can turn our attention to the token to the right. + // + // This token will be the first token (if any) to be produced by whatever + // op follows the `##`. We will thus start by initialiing the `m_currentOpStrems` + // for reading from that op. + // + m_macroOpIndex++; + _initCurrentOpStream(); + + // If the right operand yields at least one non-EOF token, then we need + // to append that content to our paste result. + // + Token rightToken = m_currentOpStreams.readToken(); + if(rightToken.type != TokenType::EndOfFile) + pastedContent << rightToken.getContent(); + + // Now we need to re-lex the token(s) that resulted from pasting, which requires + // us to create a fresh source file to represent the paste result. + // + PathInfo pathInfo = PathInfo::makeTokenPaste(); + SourceManager* sourceManager = m_preprocessor->getSourceManager(); + SourceFile* sourceFile = sourceManager->createSourceFileWithString(pathInfo, pastedContent.ProduceString()); + SourceView* sourceView = sourceManager->createSourceView(sourceFile, nullptr, tokenPasteLoc); - auto poolHandle = pool.add(buf.getUnownedSlice()); + Lexer lexer; + lexer.initialize(sourceView, GetSink(m_preprocessor), m_preprocessor->getNamePool(), sourceManager->getMemoryArena()); + auto lexedTokens = lexer.lexAllSemanticTokens(); - auto slice = pool.getSlice(poolHandle); + // The `lexedTokens` will always contain at least one token, representing an EOF for + // the end of the lexed token squence. + // + // Because we have concatenated together the content of zero, one, or two different + // tokens, there are many cases for what the result could be: + // + // * The content could lex as zero tokens, followed by an EOF. This would happen if + // both the left and right operands to `##` were empty. + // + // * The content could lex to one token, followed by an EOF. This could happen if + // one operand was empty but not the other, or if the left and right tokens concatenated + // to form a single valid token. + // + // * The content could lex to more than one token, for cases like `+` pasted with `-`, + // where the result is not a valid single token. + // + // The first two cases are both considered valid token pastes, while the latter should + // be diagnosed as a warning, even if it is clear how we can handle it. + // + if (lexedTokens.m_tokens.getCount() > 2) + { + getSink()->diagnose(tokenPasteLoc, Diagnostics::invalidTokenPasteResult, pastedContent); + } - newToken.setContent(slice); + // No matter what sequence of tokens we got, we can create an input stream to represent + // them and push it as the representation of the `##` macro definition op. + // + // Note: the stream(s) created for the right operand will be on the stack under the new + // one we push for the pasted tokens, and as such the input state is capable of reading + // from both the input stream for the `##` through to the input for the right-hand-side + // op, which is consistent with `m_macroOpIndex`. + // + SingleUseInputStream* inputStream = new SingleUseInputStream(m_preprocessor, lexedTokens); + m_currentOpStreams.push(inputStream); - // We set the location to be the same as where the original location was - newToken.loc = token.loc; + // There's one final detail to cover before we move on. *If* we used `token` as part + // of the content of the token paste, *or* if `token` is an EOF, then we need to + // replace it with the first token read from the expansion. + // + // (Otherwise, the `##` is being initialized as part of advancing through ops with + // empty expansion to the right of the op for a non-EOF `token`) + // + if((tokenOpIndex == nextOpIndex-1) || token.type == TokenType::EndOfFile) + { + // Note that `tokenOpIndex` is being set here to the op index for the + // right-hand operand to the `##`. This is appropriate for cases where + // you might have chained `##` ops: + // + // #define F(X,Y,Z) X ## Y ## Z + // + // If `Y` expands to a single token, then `X ## Y` should be treated + // as the left operand to the `Y ## Z` paste. + // + token = m_currentOpStreams.readToken(); + tokenOpIndex = m_macroOpIndex; + } - // Add to the start of the stream - SimpleTokenInputStream* simpleStream = createSimpleInputStream(preprocessor, newToken); - PushInputStream(preprocessor, simpleStream); - break; + // At this point we are ready to head back to the top of the loop and see + // if our invariants have been re-established. } + break; } } } -// Read one token with macro-expansion enabled. -static Token AdvanceToken(Preprocessor* preprocessor) +void MacroInvocation::_pushSingleTokenStream(TokenType tokenType, SourceLoc tokenLoc, UnownedStringSlice const& content) { -top: - // Check whether we need to macro expand at the cursor. - MaybeBeginMacroExpansion(preprocessor); + // The goal here is to push a token stream that represents a single token + // with exactly the given `content`, etc. + // + // We are going to keep the content alive using the slice pool for the source + // manager, which will also lead to it being shared if used multiple times. + // + SourceManager* sourceManager = m_preprocessor->getSourceManager(); + auto& pool = sourceManager->getStringSlicePool(); + auto poolHandle = pool.add(content); + auto slice = pool.getSlice(poolHandle); + + Token token; + token.type = tokenType; + token.setContent(slice); + token.loc = tokenLoc; - // Read a raw token (now that expansion has been triggered) - Token token = AdvanceRawToken(preprocessor); + TokenList lexedTokens; + lexedTokens.add(token); - // Check if we need to perform token pasting - if (PeekRawTokenType(preprocessor) != TokenType::PoundPound) + // Every token list needs to be terminated with an EOF, + // so we will construct one that matches the location + // for the `token`. + // + Token eofToken; + eofToken.type = TokenType::EndOfFile; + eofToken.loc = token.loc; + eofToken.flags = TokenFlag::AfterWhitespace | TokenFlag::AtStartOfLine; + lexedTokens.add(eofToken); + + SingleUseInputStream* inputStream = new SingleUseInputStream(m_preprocessor, lexedTokens); + m_currentOpStreams.push(inputStream); +} + +template<typename F> +void MacroInvocation::_pushStreamForSourceLocBuiltin(TokenType tokenType, F const& valueBuilder) +{ + // The `__LINE__` and `__FILE__` macros will always expand based on + // the "initiating" source location, which should come from the + // top-level file instead of any nested macros being expanded. + // + const SourceLoc initiatingLoc = m_initiatingMacroInvocationLoc; + if( !initiatingLoc.isValid() ) { - // If we aren't token pasting, then we are done - return token; + // If we cannot find a valid source location for the initiating + // location, then we will not expand the macro. + // + // TODO: Maybe we should issue a diagnostic here? + // + return; } - else + + SourceManager* sourceManager = m_preprocessor->getSourceManager(); + HumaneSourceLoc humaneInitiatingLoc = sourceManager->getHumaneLoc(initiatingLoc); + + // The `valueBuilder` provided by the caller will determine what the content + // of the token will be based on the source location (either to generate the + // `__LINE__` or the `__FILE__` value). + // + StringBuilder content; + valueBuilder(content, humaneInitiatingLoc); + + // Next we constuct and push an input stream with exactly the token type and content we want. + // + _pushSingleTokenStream(tokenType, m_macroInvocationLoc, content.getUnownedSlice()); +} + +void MacroInvocation::_initCurrentOpStream() +{ + // The job of this function is to make sure that `m_currentOpStreams` is set up + // to refelct the state of the op at `m_macroOpIndex`. + // + Index opIndex = m_macroOpIndex; + auto& op = m_macro->ops[opIndex]; + + // As one might expect, the setup logic to apply depends on the opcode for the op. + // + switch(op.opcode) { - // We are pasting tokens, which could get messy + default: + SLANG_UNEXPECTED("unhandled macro opcode case"); + break; + + case MacroDefinition::Opcode::RawSpan: + { + // A raw span of tokens (no use of macro parameters, etc.) is easy enough + // to handle. The operands of the op give us the begin/end index of the + // tokens in the macro definition that we'd like to use. + // + Index beginTokenIndex = op.index0; + Index endTokenIndex = op.index1; - StringBuilder sb; - sb << token.getContent(); + // Because the macro definition stores its definition tokens directly, we + // can simply construct a token reader for reading from the tokens in + // the chosen range, and push a matching input stream. + // + auto tokenBuffer = m_macro->tokens.begin(); + auto tokenReader = TokenReader(tokenBuffer + beginTokenIndex, tokenBuffer + endTokenIndex); + PretokenizedInputStream* stream = new PretokenizedInputStream(m_preprocessor, tokenReader); + m_currentOpStreams.push(stream); + } + break; + + case MacroDefinition::Opcode::UnexpandedParam: + { + // When a macro parameter is referenced as an operand of a token paste (`##`) + // it is not subjected to macro expansion. + // + // In this case, the zero-based index of the macro parameter was stored in + // the `index1` operand to the macro op. + // + Index paramIndex = op.index1; + SLANG_ASSERT(paramIndex >= 0); + SLANG_ASSERT(paramIndex < m_macro->params.getCount()); + SLANG_ASSERT(paramIndex < m_args.getCount()); + + // We can look up the corresponding argument to the macro invocation, + // which stores a begin/end pair of indices into the raw token stream + // that makes up the macro arguments. + // + auto arg = m_args[paramIndex]; + auto argTokens = m_argTokens.getBuffer(); + auto tokenReader = TokenReader(argTokens + arg.beginTokenIndex, argTokens + arg.endTokenIndex); - Token poundPoundToken; + // Because expansion doesn't apply to this parameter reference, we can simply + // play back those tokens exactly as they appeared in the argument list. + // + PretokenizedInputStream* stream = new PretokenizedInputStream(m_preprocessor, tokenReader); + m_currentOpStreams.push(stream); + } + break; - while (PeekRawTokenType(preprocessor) == TokenType::PoundPound) + case MacroDefinition::Opcode::ExpandedParam: { - // Consume the `##` - poundPoundToken = AdvanceRawToken(preprocessor); + // Most uses of a macro parameter will be subject to macro expansion. + // + // The initial logic here is similar to the unexpanded case above. + // + Index paramIndex = op.index1; + SLANG_ASSERT(paramIndex >= 0); + SLANG_ASSERT(paramIndex < m_macro->params.getCount()); + SLANG_ASSERT(paramIndex < m_args.getCount()); - // Possibly macro-expand the next token - MaybeBeginMacroExpansion(preprocessor); + auto arg = m_args[paramIndex]; + auto argTokens = m_argTokens.getBuffer(); + auto tokenReader = TokenReader(argTokens + arg.beginTokenIndex, argTokens + arg.endTokenIndex); - // Read the next raw token (now that expansion has been triggered) - Token nextToken = AdvanceRawToken(preprocessor); + PretokenizedInputStream* stream = new PretokenizedInputStream(m_preprocessor, tokenReader); - sb << nextToken.getContent(); + // The only interesting addition to the unexpanded case is that we wrap + // the stream that "plays back" the argument tokens with a stream that + // applies macro expansion to them. + // + ExpansionInputStream* expansion = new ExpansionInputStream(m_preprocessor, stream); + m_currentOpStreams.push(expansion); } + break; - // Now re-lex the input + case MacroDefinition::Opcode::StringizedParam: + { + // A macro parameter can also be "stringized" in which case the (unexpanded) + // argument tokens will be concatenated and escaped to form the content of + // a string literal. + // + // Much of the initial logic is shared with the other parameter cases above. + // + Index tokenIndex = op.index0; + auto loc = m_macro->tokens.m_tokens[tokenIndex].loc; + + Index paramIndex = op.index1; + SLANG_ASSERT(paramIndex >= 0); + SLANG_ASSERT(paramIndex < m_macro->params.getCount()); + SLANG_ASSERT(paramIndex < m_args.getCount()); - SourceManager* sourceManager = preprocessor->getSourceManager(); + auto arg = m_args[paramIndex]; + auto argTokens = m_argTokens.getBuffer(); - // We create a dummy file to represent the token-paste operation - PathInfo pathInfo = PathInfo::makeTokenPaste(); - - SourceFile* sourceFile = sourceManager->createSourceFileWithString(pathInfo, sb.ProduceString()); - SourceView* sourceView = sourceManager->createSourceView(sourceFile, nullptr, poundPoundToken.getLoc()); + // We will now iterate over the argument tokens that were passed for + // this parameter, and use them to build a string. + // + auto beginToken = argTokens + arg.beginTokenIndex; + auto endToken = argTokens + arg.endTokenIndex; + + // A stringized parameter is always a `"`-enclosed string literal + // (there is no way to stringize things to form a character literal). + // + StringBuilder builder; + builder.appendChar('"'); + for(auto tokenCursor = beginToken; tokenCursor != endToken; tokenCursor++) + { + auto token = *tokenCursor; - Lexer lexer; - lexer.initialize(sourceView, GetSink(preprocessor), preprocessor->getNamePool(), sourceManager->getMemoryArena()); + // Any whitespace between the tokens of argument must be collapsed into + // a single space character. Fortunately for us, the lexer has tracked + // for each token whether it was immediately preceded by whitespace, + // so we can check for whitespace that precedes any token except the first. + // + if(tokenCursor != beginToken && (token.flags & TokenFlag::AfterWhitespace)) + { + builder.appendChar(' '); + } - SimpleTokenInputStream* inputStream = new SimpleTokenInputStream(); - initializeInputStream(preprocessor, inputStream); + // We need to rememember to apply escaping to the content of any tokens + // being pulled into the string. E.g., this would come up if we end up + // trying to stringize a literal like `"this"` because we need the resulting + // token to be `"\"this\""` which includes the quote characters in the string + // literal value. + // + auto handler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp); + handler->appendEscaped(token.getContent(), builder); + } + builder.appendChar('"'); - inputStream->lexedTokens = lexer.lexAllTokens(); - inputStream->tokenReader = TokenReader(inputStream->lexedTokens); + // Once we've constructed the content of the stringized result, we need to push + // a new single-token stream that represents that content. + // + _pushSingleTokenStream(TokenType::StringLiteral, loc, builder.getUnownedSlice()); + } + break; - // We expect the reuslt of lexing to be two tokens: one for the actual value, - // and one for the end-of-input marker. - if (inputStream->tokenReader.getCount() != 2) + case MacroDefinition::Opcode::BuiltinLine: { - // We expect a token paste to produce a single token - // TODO(tfoley): emit a diagnostic here + // This is a special opcode used only in the definition of the built-in `__LINE__` macro + // (note that *uses* of `__LINE__` do not map to this opcode; only the definition of + // `__LINE__` itself directly uses it). + // + // Most of the logic for generating a token from the current source location is wrapped up + // in a helper routine so that we don't need to duplicate it between this and the `__FILE__` + // case below. + // + // The only key details here are that we specify the type of the token (`IntegerLiteral`) + // and its content (the value of `loc.line`). + // + _pushStreamForSourceLocBuiltin(TokenType::IntegerLiteral, [=](StringBuilder& builder, HumaneSourceLoc const& loc) + { + builder << loc.line; + }); } + break; - PushInputStream(preprocessor, inputStream); - goto top; - } -} - -// Read one token with macro-expansion enabled. -// -// Note that because triggering macro expansion may -// involve changing the input-stream state, this -// operation *can* have side effects. -static Token PeekToken(Preprocessor* preprocessor) -{ - // Check whether we need to macro expand at the cursor. - MaybeBeginMacroExpansion(preprocessor); - - // Peek a raw token (now that expansion has been triggered) - return PeekRawToken(preprocessor); + case MacroDefinition::Opcode::BuiltinFile: + { + // The `__FILE__` case is quite similar to `__LINE__`, except for the type of token it yields, + // and the way it computes the desired token content. + // + _pushStreamForSourceLocBuiltin(TokenType::StringLiteral, [=](StringBuilder& builder, HumaneSourceLoc const& loc) + { - // TODO: need a plan for how to handle token pasting - // here without it being onerous. Would be nice if we - // didn't have to re-do pasting on a "peek"... -} + auto escapeHandler = StringEscapeUtil::getHandler(StringEscapeUtil::Style::Cpp); + StringEscapeUtil::appendQuoted(escapeHandler, loc.pathInfo.foundPath.getUnownedSlice(), builder); + }); + } + break; -// Peek the type of the next token, including macro expansion. -static TokenType PeekTokenType(Preprocessor* preprocessor) -{ - return PeekToken(preprocessor).type; + case MacroDefinition::Opcode::TokenPaste: + // Note: If we ever end up in this case for `Opcode::TokenPaste`, then it implies + // something went very wrong. + // + // A `##` op should not be allowed to appear as the first (or last) token in + // a macro body, and consecutive `##`s should be treated as a single `##`. + // + // When `_initCurrentOpStream()` gets called it is either: + // + // * called on the first op in the body of a macro (can't be a token paste) + // + // * called on the first op *after* a `##` (can't be another `##`) + // + // * explicitly tests for an handles token pastes spearately + // + // If we end up hitting the error here, then `_initCurrentOpStream()` is getting + // called in an inappropriate case. + // + SLANG_UNEXPECTED("token paste op in macro expansion"); + break; + } } // @@ -1185,81 +2059,108 @@ static TokenType PeekTokenType(Preprocessor* preprocessor) struct PreprocessorDirectiveContext { // The preprocessor that is parsing the directive. - Preprocessor* preprocessor; + Preprocessor* m_preprocessor; // The directive token (e.g., the `if` in `#if`). // Useful for reference in diagnostic messages. - Token directiveToken; + Token m_directiveToken; // Has any kind of parse error been encountered in // the directive so far? - bool parseError; + bool m_parseError; // Have we done the necessary checks at the end // of the directive already? - bool haveDoneEndOfDirectiveChecks; + bool m_haveDoneEndOfDirectiveChecks; + + /// The input file that the directive appeared in + /// + InputFile* m_inputFile; }; // Get the token for the preprocessor directive being parsed. inline Token const& GetDirective(PreprocessorDirectiveContext* context) { - return context->directiveToken; + return context->m_directiveToken; } // Get the name of the directive being parsed. inline UnownedStringSlice GetDirectiveName(PreprocessorDirectiveContext* context) { - return context->directiveToken.getContent(); + return context->m_directiveToken.getContent(); } // Get the location of the directive being parsed. inline SourceLoc const& GetDirectiveLoc(PreprocessorDirectiveContext* context) { - return context->directiveToken.loc; + return context->m_directiveToken.loc; } // Wrapper to get the diagnostic sink in the context of a directive. static inline DiagnosticSink* GetSink(PreprocessorDirectiveContext* context) { - return GetSink(context->preprocessor); + return GetSink(context->m_preprocessor); +} + +static InputFile* getInputFile(PreprocessorDirectiveContext* context) +{ + return context->m_inputFile; +} + +static ExpansionInputStream* getInputStream(PreprocessorDirectiveContext* context) +{ + return context->m_inputFile->getExpansionStream(); } // Wrapper to get a "current" location when parsing a directive static SourceLoc PeekLoc(PreprocessorDirectiveContext* context) { - return PeekLoc(context->preprocessor); + auto inputStream = getInputStream(context); + return inputStream->peekLoc(); } // Wrapper to look up a macro in the context of a directive. -static PreprocessorMacro* LookupMacro(PreprocessorDirectiveContext* context, Name* name) +static MacroDefinition* LookupMacro(PreprocessorDirectiveContext* context, Name* name) { - return LookupMacro(context->preprocessor, name); + auto preprocessor = context->m_preprocessor; + return LookupMacro(&preprocessor->globalEnv, name); } // Determine if we have read everything on the directive's line. static bool IsEndOfLine(PreprocessorDirectiveContext* context) { - return PeekRawToken(context->preprocessor).type == TokenType::EndOfDirective; + auto inputStream = getInputStream(context); + switch(inputStream->peekRawTokenType()) + { + case TokenType::EndOfFile: + case TokenType::NewLine: + return true; + + default: + return false; + } } + // Peek one raw token in a directive, without going past the end of the line. static Token PeekRawToken(PreprocessorDirectiveContext* context) { - return PeekRawToken(context->preprocessor); + auto inputStream = getInputStream(context); + return inputStream->peekRawToken(); } // Read one raw token in a directive, without going past the end of the line. -static Token AdvanceRawToken(PreprocessorDirectiveContext* context, LexerFlags lexerFlags = 0) +static Token AdvanceRawToken(PreprocessorDirectiveContext* context) { - if (IsEndOfLine(context)) - return PeekRawToken(context); - return AdvanceRawToken(context->preprocessor, lexerFlags); + auto inputStream = getInputStream(context); + return inputStream->readRawToken(); } // Peek next raw token type, without going past the end of the line. static TokenType PeekRawTokenType(PreprocessorDirectiveContext* context) { - return PeekRawTokenType(context->preprocessor); + auto inputStream = getInputStream(context); + return inputStream->peekRawTokenType(); } // Read one token, with macro-expansion, without going past the end of the line. @@ -1267,23 +2168,21 @@ static Token AdvanceToken(PreprocessorDirectiveContext* context) { if (IsEndOfLine(context)) return PeekRawToken(context); - return AdvanceToken(context->preprocessor); + return getInputStream(context)->readToken(); } // Peek one token, with macro-expansion, without going past the end of the line. static Token PeekToken(PreprocessorDirectiveContext* context) { - if (IsEndOfLine(context)) - return context->preprocessor->endOfFileToken; - return PeekToken(context->preprocessor); + auto inputStream = getInputStream(context); + return inputStream->peekToken(); } // Peek next token type, with macro-expansion, without going past the end of the line. static TokenType PeekTokenType(PreprocessorDirectiveContext* context) { - if (IsEndOfLine(context)) - return TokenType::EndOfDirective; - return PeekTokenType(context->preprocessor); + auto inputStream = getInputStream(context); + return inputStream->peekTokenType(); } // Skip to the end of the line (useful for recovering from errors in a directive) @@ -1300,11 +2199,11 @@ static bool ExpectRaw(PreprocessorDirectiveContext* context, TokenType tokenType if (PeekRawTokenType(context) != tokenType) { // Only report the first parse error within a directive - if (!context->parseError) + if (!context->m_parseError) { GetSink(context)->diagnose(PeekLoc(context), diagnostic, tokenType, GetDirectiveName(context)); } - context->parseError = true; + context->m_parseError = true; return false; } Token const& token = AdvanceRawToken(context); @@ -1318,10 +2217,10 @@ static bool Expect(PreprocessorDirectiveContext* context, TokenType tokenType, D if (PeekTokenType(context) != tokenType) { // Only report the first parse error within a directive - if (!context->parseError) + if (!context->m_parseError) { GetSink(context)->diagnose(PeekLoc(context), diagnostic, tokenType, GetDirectiveName(context)); - context->parseError = true; + context->m_parseError = true; } return false; } @@ -1337,89 +2236,82 @@ static bool Expect(PreprocessorDirectiveContext* context, TokenType tokenType, D // Preprocessor Conditionals // -// Determine whether the current preprocessor state means we -// should be skipping tokens. -static bool IsSkipping(Preprocessor* preprocessor) +bool InputFile::isSkipping() { - PreprocessorInputStream* inputStream = preprocessor->inputStream; - if (!inputStream) return false; - - PrimaryInputStream* primaryStream = inputStream->primaryStream; - if(!primaryStream) return false; - // If we are not inside a preprocessor conditional, then don't skip - PreprocessorConditional* conditional = primaryStream->conditional; + Conditional* conditional = m_conditional; if (!conditional) return false; // skip tokens unless the conditional is inside its `true` case - return conditional->state != PreprocessorConditionalState::During; + return conditional->state != Conditional::State::During; } // Wrapper for use inside directives -static inline bool IsSkipping(PreprocessorDirectiveContext* context) +static inline bool isSkipping(PreprocessorDirectiveContext* context) { - return IsSkipping(context->preprocessor); + return getInputFile(context)->isSkipping(); } // Create a preprocessor conditional -static PreprocessorConditional* CreateConditional(Preprocessor* /*preprocessor*/) +static Conditional* CreateConditional(Preprocessor* /*preprocessor*/) { // TODO(tfoley): allocate these more intelligently (for example, // pool them on the `Preprocessor`. - return new PreprocessorConditional(); + return new Conditional(); } -// Destroy a preprocessor conditional. -static void DestroyConditional(PreprocessorConditional* conditional) +static void _setLexerDiagnosticSuppression( + InputFile* inputFile, + bool shouldSuppressDiagnostics) { - delete conditional; + if(shouldSuppressDiagnostics) + { + inputFile->getLexer()->m_lexerFlags |= kLexerFlag_SuppressDiagnostics; + } + else + { + inputFile->getLexer()->m_lexerFlags &= ~kLexerFlag_SuppressDiagnostics; + } +} + + +static void updateLexerFlagsForConditionals( + InputFile* inputFile) +{ + _setLexerDiagnosticSuppression(inputFile, inputFile->isSkipping()); } -// Start a preprocessor conditional, with an initial enable/disable state. + /// Start a preprocessor conditional, with an initial enable/disable state. static void beginConditional( PreprocessorDirectiveContext* context, - PreprocessorInputStream* inputStream, bool enable) { - Preprocessor* preprocessor = context->preprocessor; - SLANG_ASSERT(inputStream); + Preprocessor* preprocessor = context->m_preprocessor; + InputFile* inputFile = getInputFile(context); - PreprocessorConditional* conditional = CreateConditional(preprocessor); + Conditional* conditional = CreateConditional(preprocessor); - conditional->ifToken = context->directiveToken; + conditional->ifToken = context->m_directiveToken; // Set state of this condition appropriately. // // Default to the "haven't yet seen a `true` branch" state. - PreprocessorConditionalState state = PreprocessorConditionalState::Before; + Conditional::State state = Conditional::State::Before; // // If we are nested inside a `false` branch of another condition, then // we never want to enable, so we act as if we already *saw* the `true` branch. // - if (IsSkipping(preprocessor)) state = PreprocessorConditionalState::After; - // - // Similarly, if we ran into any parse errors when dealing with the - // opening directive, then things are probably screwy and we should just - // skip all the branches. - if (IsSkipping(preprocessor)) state = PreprocessorConditionalState::After; + if (inputFile->isSkipping()) state = Conditional::State::After; // // Otherwise, if our condition was true, then set us to be inside the `true` branch - else if (enable) state = PreprocessorConditionalState::During; + else if (enable) state = Conditional::State::During; conditional->state = state; // Push conditional onto the stack - auto primaryStream = inputStream->primaryStream; - conditional->parent = primaryStream->conditional; - primaryStream->conditional = conditional; -} + inputFile->pushConditional(conditional); -// Start a preprocessor conditional, with an initial enable/disable state. -static void beginConditional( - PreprocessorDirectiveContext* context, - bool enable) -{ - beginConditional(context, context->preprocessor->inputStream, enable); + updateLexerFlagsForConditionals(inputFile); } // @@ -1430,13 +2322,15 @@ static void beginConditional( typedef int PreprocessorExpressionValue; // Forward-declaretion -static PreprocessorExpressionValue ParseAndEvaluateExpression(PreprocessorDirectiveContext* context); +static PreprocessorExpressionValue _parseAndEvaluateExpression(PreprocessorDirectiveContext* context); // Parse a unary (prefix) expression inside of a preprocessor directive. static PreprocessorExpressionValue ParseAndEvaluateUnaryExpression(PreprocessorDirectiveContext* context) { - if( PeekTokenType(context) == TokenType::EndOfDirective ) + switch(PeekTokenType(context)) { + case TokenType::EndOfFile: + case TokenType::NewLine: GetSink(context)->diagnose(PeekLoc(context), Diagnostics::syntaxErrorInPreprocessorExpression); return 0; } @@ -1456,7 +2350,7 @@ static PreprocessorExpressionValue ParseAndEvaluateUnaryExpression(PreprocessorD case TokenType::LParent: { Token leftParen = token; - PreprocessorExpressionValue value = ParseAndEvaluateExpression(context); + PreprocessorExpressionValue value = _parseAndEvaluateExpression(context); if (!Expect(context, TokenType::RParent, Diagnostics::expectedTokenInPreprocessorExpression)) { GetSink(context)->diagnose(leftParen.loc, Diagnostics::seeOpeningToken, leftParen); @@ -1578,7 +2472,7 @@ static PreprocessorExpressionValue EvaluateInfixOp( { if (right == 0) { - if (!context->parseError) + if (!context->m_parseError) { GetSink(context)->diagnose(opToken.loc, Diagnostics::divideByZeroInPreprocessorExpression); } @@ -1590,7 +2484,7 @@ static PreprocessorExpressionValue EvaluateInfixOp( { if (right == 0) { - if (!context->parseError) + if (!context->m_parseError) { GetSink(context)->diagnose(opToken.loc, Diagnostics::divideByZeroInPreprocessorExpression); } @@ -1672,8 +2566,8 @@ static PreprocessorExpressionValue ParseAndEvaluateInfixExpressionWithPrecedence return left; } -// Parse a complete (infix) preprocessor expression, and return its value -static PreprocessorExpressionValue ParseAndEvaluateExpression(PreprocessorDirectiveContext* context) + /// Parse a complete (infix) preprocessor expression, and return its value +static PreprocessorExpressionValue _parseAndEvaluateExpression(PreprocessorDirectiveContext* context) { // First read in the left-hand side (or the whole expression in the unary case) PreprocessorExpressionValue value = ParseAndEvaluateUnaryExpression(context); @@ -1681,32 +2575,38 @@ static PreprocessorExpressionValue ParseAndEvaluateExpression(PreprocessorDirect // Try to read in trailing infix operators with correct precedence return ParseAndEvaluateInfixExpressionWithPrecedence(context, value, 0); } - -// Handle a `#if` directive -static void HandleIfDirective(PreprocessorDirectiveContext* context) + /// Parse a preprocessor expression, or skip it if we are in a disabled conditional +static PreprocessorExpressionValue _skipOrParseAndEvaluateExpression(PreprocessorDirectiveContext* context) { - // Record current input stream in case preprocessor expression - // changes the input stream to a macro expansion while we - // are parsing. - auto inputStream = context->preprocessor->inputStream; + auto inputStream = getInputFile(context); - // If we are skipping, we can just consume the expression, and assume true - if (IsSkipping(context->preprocessor)) + // If we are skipping, we want to ignore the expression (including + // anything in it that would lead to a failure in parsing). + // + // We can simply treat the expression as `0` in this case, since its + // value won't actually matter. + // + if (inputStream->isSkipping()) { // Consume everything until the end of the line SkipToEndOfLine(context); - // Begin a preprocessor block, assume true based on the expression - // (contents will all be ignored because skipping). - beginConditional(context, inputStream, true); + return 0; } - else - { - // Parse a preprocessor expression. - PreprocessorExpressionValue value = ParseAndEvaluateExpression(context); - // Begin a preprocessor block, enabled based on the expression. - beginConditional(context, inputStream, value != 0); - } + // Otherwise, we will need to parse an expression and return + // its evaluated value. + // + return _parseAndEvaluateExpression(context); +} + +// Handle a `#if` directive +static void HandleIfDirective(PreprocessorDirectiveContext* context) +{ + // Read a preprocessor expression (if not skipping), and begin a conditional + // based on the value of that expression. + // + PreprocessorExpressionValue value = _skipOrParseAndEvaluateExpression(context); + beginConditional(context, value != 0); } // Handle a `#ifdef` directive @@ -1738,11 +2638,11 @@ static void HandleIfNDefDirective(PreprocessorDirectiveContext* context) // Handle a `#else` directive static void HandleElseDirective(PreprocessorDirectiveContext* context) { - PreprocessorInputStream* inputStream = context->preprocessor->inputStream; - SLANG_ASSERT(inputStream); + InputFile* inputFile = getInputFile(context); + SLANG_ASSERT(inputFile); // if we aren't inside a conditional, then error - PreprocessorConditional* conditional = inputStream->primaryStream->conditional; + Conditional* conditional = inputFile->getInnerMostConditional(); if (!conditional) { GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::directiveWithoutIf, GetDirectiveName(context)); @@ -1756,21 +2656,23 @@ static void HandleElseDirective(PreprocessorDirectiveContext* context) GetSink(context)->diagnose(conditional->elseToken.loc, Diagnostics::seeDirective); return; } - conditional->elseToken = context->directiveToken; + conditional->elseToken = context->m_directiveToken; switch (conditional->state) { - case PreprocessorConditionalState::Before: - conditional->state = PreprocessorConditionalState::During; + case Conditional::State::Before: + conditional->state = Conditional::State::During; break; - case PreprocessorConditionalState::During: - conditional->state = PreprocessorConditionalState::After; + case Conditional::State::During: + conditional->state = Conditional::State::After; break; default: break; } + + updateLexerFlagsForConditionals(inputFile); } // Handle a `#elif` directive @@ -1778,25 +2680,27 @@ static void HandleElifDirective(PreprocessorDirectiveContext* context) { // Need to grab current input stream *before* we try to parse // the conditional expression. - PreprocessorInputStream* inputStream = context->preprocessor->inputStream; - SLANG_ASSERT(inputStream); + InputFile* inputFile = getInputFile(context); + SLANG_ASSERT(inputFile); // HACK(tfoley): handle an empty `elif` like an `else` directive // // This is the behavior expected by at least one input program. // We will eventually want to be pedantic about this. // even if t - if (PeekRawTokenType(context) == TokenType::EndOfDirective) + switch(PeekRawTokenType(context)) { + case TokenType::EndOfFile: + case TokenType::NewLine: GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::directiveExpectsExpression, GetDirectiveName(context)); HandleElseDirective(context); return; } - PreprocessorExpressionValue value = ParseAndEvaluateExpression(context); + PreprocessorExpressionValue value = _parseAndEvaluateExpression(context); // if we aren't inside a conditional, then error - PreprocessorConditional* conditional = inputStream->primaryStream->conditional; + Conditional* conditional = inputFile->getInnerMostConditional(); if (!conditional) { GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::directiveWithoutIf, GetDirectiveName(context)); @@ -1813,36 +2717,39 @@ static void HandleElifDirective(PreprocessorDirectiveContext* context) switch (conditional->state) { - case PreprocessorConditionalState::Before: + case Conditional::State::Before: if(value) - conditional->state = PreprocessorConditionalState::During; + conditional->state = Conditional::State::During; break; - case PreprocessorConditionalState::During: - conditional->state = PreprocessorConditionalState::After; + case Conditional::State::During: + conditional->state = Conditional::State::After; break; default: break; } + + updateLexerFlagsForConditionals(inputFile); } // Handle a `#endif` directive static void HandleEndIfDirective(PreprocessorDirectiveContext* context) { - PreprocessorInputStream* inputStream = context->preprocessor->inputStream; - SLANG_ASSERT(inputStream); + InputFile* inputFile = getInputFile(context); + SLANG_ASSERT(inputFile); // if we aren't inside a conditional, then error - PreprocessorConditional* conditional = inputStream->primaryStream->conditional; + Conditional* conditional = inputFile->getInnerMostConditional(); if (!conditional) { GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::directiveWithoutIf, GetDirectiveName(context)); return; } - inputStream->primaryStream->conditional = conditional->parent; - DestroyConditional(conditional); + inputFile->popConditional(); + + updateLexerFlagsForConditionals(inputFile); } // Helper routine to check that we find the end of a directive where @@ -1854,24 +2761,24 @@ static void HandleEndIfDirective(PreprocessorDirectiveContext* context) // when it switches the input stream. static void expectEndOfDirective(PreprocessorDirectiveContext* context) { - if(context->haveDoneEndOfDirectiveChecks) + if(context->m_haveDoneEndOfDirectiveChecks) return; - context->haveDoneEndOfDirectiveChecks = true; + context->m_haveDoneEndOfDirectiveChecks = true; if (!IsEndOfLine(context)) { // If we already saw a previous parse error, then don't // emit another one for the same directive. - if (!context->parseError) + if (!context->m_parseError) { GetSink(context)->diagnose(PeekLoc(context), Diagnostics::unexpectedTokensAfterDirective, GetDirectiveName(context)); } SkipToEndOfLine(context); } - // Clear out the end-of-directive token - AdvanceRawToken(context->preprocessor); + // Clear out the end-of-line token + AdvanceRawToken(context); } /// Read a file in the context of handling a preprocessor directive @@ -1883,14 +2790,14 @@ static SlangResult readFile( // The actual file loading will be handled by the file system // associated with the parent linkage. // - auto fileSystemExt = context->preprocessor->fileSystem; + auto fileSystemExt = context->m_preprocessor->fileSystem; SLANG_RETURN_ON_FAIL(fileSystemExt->loadFile(path.getBuffer(), outBlob)); // If we are running the preprocessor as part of compiling a // specific module, then we must keep track of the file we've // read as yet another file that the module will depend on. // - if( auto handler = context->preprocessor->handler ) + if( auto handler = context->m_preprocessor->handler ) { handler->handleFileDependency(path); } @@ -1898,11 +2805,17 @@ static SlangResult readFile( return SLANG_OK; } +void Preprocessor::pushInputFile(InputFile* inputFile) +{ + inputFile->m_parent = m_currentInputFile; + m_currentInputFile = inputFile; +} + // Handle a `#include` directive static void HandleIncludeDirective(PreprocessorDirectiveContext* context) { - // Consume the directive, and inform the lexer to process the remainder of the line as a file path. - AdvanceRawToken(context, kLexerFlag_ExpectFileName); + // Consume the directive + AdvanceRawToken(context); Token pathToken; if(!Expect(context, TokenType::StringLiteral, Diagnostics::expectedTokenInPreprocessorDirective, &pathToken)) @@ -1912,9 +2825,9 @@ static void HandleIncludeDirective(PreprocessorDirectiveContext* context) auto directiveLoc = GetDirectiveLoc(context); - PathInfo includedFromPathInfo = context->preprocessor->getSourceManager()->getPathInfo(directiveLoc, SourceLocType::Actual); + PathInfo includedFromPathInfo = context->m_preprocessor->getSourceManager()->getPathInfo(directiveLoc, SourceLocType::Actual); - IncludeSystem* includeSystem = context->preprocessor->includeSystem; + IncludeSystem* includeSystem = context->m_preprocessor->includeSystem; if (!includeSystem) { GetSink(context)->diagnose(pathToken.loc, Diagnostics::includeFailed, path); @@ -1943,7 +2856,7 @@ static void HandleIncludeDirective(PreprocessorDirectiveContext* context) expectEndOfDirective(context); // Check whether we've previously included this file and seen a `#pragma once` directive - if(context->preprocessor->pragmaOnceUniqueIdentities.Contains(filePathInfo.uniqueIdentity)) + if(context->m_preprocessor->pragmaOnceUniqueIdentities.Contains(filePathInfo.uniqueIdentity)) { return; } @@ -1953,7 +2866,7 @@ static void HandleIncludeDirective(PreprocessorDirectiveContext* context) // Push the new file onto our stack of input streams // TODO(tfoley): check if we have made our include stack too deep - auto sourceManager = context->preprocessor->getSourceManager(); + auto sourceManager = context->m_preprocessor->getSourceManager(); // See if this an already loaded source file SourceFile* sourceFile = sourceManager->findSourceFileRecursively(filePathInfo.uniqueIdentity); @@ -1975,9 +2888,124 @@ static void HandleIncludeDirective(PreprocessorDirectiveContext* context) // This is a new parse (even if it's a pre-existing source file), so create a new SourceView SourceView* sourceView = sourceManager->createSourceView(sourceFile, &filePathInfo, directiveLoc); - PreprocessorInputStream* inputStream = CreateInputStreamForSource(context->preprocessor, sourceView); - inputStream->parent = context->preprocessor->inputStream; - context->preprocessor->inputStream = inputStream; + InputFile* inputFile = new InputFile(context->m_preprocessor, sourceView); + + context->m_preprocessor->pushInputFile(inputFile); +} + +static void _parseMacroOps( + Preprocessor* preprocessor, + MacroDefinition* macro, + Dictionary<Name*, Index> const& mapParamNameToIndex) +{ + // Scan through the tokens to recognize the "ops" that make up + // the macro body. + // + Index spanBeginIndex = 0; + Index cursor = 0; + for(;;) + { + Index spanEndIndex = cursor; + Index tokenIndex = cursor++; + Token const& token = macro->tokens.m_tokens[tokenIndex]; + MacroDefinition::Op newOp; + switch(token.type) + { + default: + // Most tokens just continue our current span. + continue; + + case TokenType::Identifier: + { + auto paramName = token.getName(); + Index paramIndex = -1; + if(!mapParamNameToIndex.TryGetValue(paramName, paramIndex)) + { + continue; + } + + newOp.opcode = MacroDefinition::Opcode::ExpandedParam; + newOp.index0 = tokenIndex; + newOp.index1 = paramIndex; + } + break; + + case TokenType::Pound: + { + auto paramNameTokenIndex = cursor; + auto paramNameToken = macro->tokens.m_tokens[paramNameTokenIndex]; + if(paramNameToken.type != TokenType::Identifier) + { + GetSink(preprocessor)->diagnose(token.loc, Diagnostics::expectedMacroParameterAfterStringize); + continue; + } + auto paramName = paramNameToken.getName(); + Index paramIndex = -1; + if(!mapParamNameToIndex.TryGetValue(paramName, paramIndex)) + { + GetSink(preprocessor)->diagnose(token.loc, Diagnostics::expectedMacroParameterAfterStringize); + continue; + } + + cursor++; + + newOp.opcode = MacroDefinition::Opcode::StringizedParam; + newOp.index0 = tokenIndex; + newOp.index1 = paramIndex; + } + break; + + case TokenType::PoundPound: + if(macro->ops.getCount() == 0 && (spanBeginIndex == spanEndIndex)) + { + GetSink(preprocessor)->diagnose(token.loc, Diagnostics::tokenPasteAtStart); + continue; + } + + if(macro->tokens.m_tokens[cursor].type == TokenType::EndOfFile) + { + GetSink(preprocessor)->diagnose(token.loc, Diagnostics::tokenPasteAtEnd); + continue; + } + + newOp.opcode = MacroDefinition::Opcode::TokenPaste; + newOp.index0 = tokenIndex; + newOp.index1 = 0; + + // Okay, we need to do something here! + + break; + + case TokenType::EndOfFile: + break; + } + + if(spanBeginIndex != spanEndIndex + || ((token.type == TokenType::EndOfFile) && (macro->ops.getCount() == 0))) + { + MacroDefinition::Op spanOp; + spanOp.opcode = MacroDefinition::Opcode::RawSpan; + spanOp.index0 = spanBeginIndex; + spanOp.index1 = spanEndIndex; + macro->ops.add(spanOp); + } + if(token.type == TokenType::EndOfFile) + break; + + macro->ops.add(newOp); + spanBeginIndex = cursor; + } + + Index opCount = macro->ops.getCount(); + SLANG_ASSERT(opCount != 0); + for(Index i = 1; i < opCount-1; ++i) + { + if(macro->ops[i].opcode == MacroDefinition::Opcode::TokenPaste) + { + if(macro->ops[i-1].opcode == MacroDefinition::Opcode::ExpandedParam) macro->ops[i-1].opcode = MacroDefinition::Opcode::UnexpandedParam; + if(macro->ops[i+1].opcode == MacroDefinition::Opcode::ExpandedParam) macro->ops[i+1].opcode = MacroDefinition::Opcode::UnexpandedParam; + } + } } // Handle a `#define` directive @@ -1988,15 +3016,12 @@ static void HandleDefineDirective(PreprocessorDirectiveContext* context) return; Name* name = nameToken.getName(); - PreprocessorMacro* macro = CreateMacro(context->preprocessor); - macro->nameAndLoc = NameLoc(nameToken); - - PreprocessorMacro* oldMacro = LookupMacro(&context->preprocessor->globalEnv, name); + MacroDefinition* oldMacro = LookupMacro(&context->m_preprocessor->globalEnv, name); if (oldMacro) { auto sink = GetSink(context); - if (isBuiltinMacro(oldMacro->flavor)) + if (oldMacro->isBuiltin()) { sink->diagnose(nameToken.loc, Diagnostics::builtinMacroRedefinition, name); } @@ -2006,69 +3031,102 @@ static void HandleDefineDirective(PreprocessorDirectiveContext* context) sink->diagnose(oldMacro->getLoc(), Diagnostics::seePreviousDefinitionOf, name); } - DestroyMacro(context->preprocessor, oldMacro); + delete oldMacro; } - context->preprocessor->globalEnv.macros[name] = macro; + + MacroDefinition* macro = new MacroDefinition(); + + Dictionary<Name*, Index> mapParamNameToIndex; // If macro name is immediately followed (with no space) by `(`, // then we have a function-like macro - if (PeekRawTokenType(context) == TokenType::LParent) + auto maybeOpenParen = PeekRawToken(context); + if (maybeOpenParen.type == TokenType::LParent && !(maybeOpenParen.flags & TokenFlag::AfterWhitespace)) { - if (!(PeekRawToken(context).flags & TokenFlag::AfterWhitespace)) - { - // This is a function-like macro, so we need to remember that - // and start capturing parameters - macro->flavor = PreprocessorMacroFlavor::FunctionLike; + // This is a function-like macro, so we need to remember that + // and start capturing parameters + macro->flavor = MacroDefinition::Flavor::FunctionLike; - AdvanceRawToken(context); + AdvanceRawToken(context); - // If there are any parameters, parse them - if (PeekRawTokenType(context) != TokenType::RParent) + // If there are any parameters, parse them + if (PeekRawTokenType(context) != TokenType::RParent) + { + for (;;) { - for (;;) - { - // TODO: handle elipsis (`...`) for varags + // TODO: handle elipsis (`...`) for varags - // A macro parameter name should be a raw identifier - Token paramToken; - if (!ExpectRaw(context, TokenType::Identifier, Diagnostics::expectedTokenInMacroParameters, ¶mToken)) - break; + // A macro parameter name should be a raw identifier + Token paramToken; + if (!ExpectRaw(context, TokenType::Identifier, Diagnostics::expectedTokenInMacroParameters, ¶mToken)) + break; - // TODO(tfoley): some validation on parameter name. - // Certain names (e.g., `defined` and `__VA_ARGS__` - // are not allowed to be used as macros or parameters). + // TODO(tfoley): some validation on parameter name. + // Certain names (e.g., `defined` and `__VA_ARGS__` + // are not allowed to be used as macros or parameters). - // Add the parameter to the macro being deifned - macro->params.add(paramToken); + // Add the parameter to the macro being deifned + auto paramIndex = macro->params.getCount(); + macro->params.add(paramToken); - // If we see `)` then we are done with arguments - if (PeekRawTokenType(context) == TokenType::RParent) - break; - - ExpectRaw(context, TokenType::Comma, Diagnostics::expectedTokenInMacroParameters); + auto paramName = paramToken.getName(); + if(mapParamNameToIndex.ContainsKey(paramName)) + { + GetSink(context)->diagnose(paramToken.loc, Diagnostics::duplicateMacroParameterName, name); } - } + else + { + mapParamNameToIndex[paramName] = paramIndex; + } + - ExpectRaw(context, TokenType::RParent, Diagnostics::expectedTokenInMacroParameters); + // If we see `)` then we are done with arguments + if (PeekRawTokenType(context) == TokenType::RParent) + break; + + ExpectRaw(context, TokenType::Comma, Diagnostics::expectedTokenInMacroParameters); + } } + + ExpectRaw(context, TokenType::RParent, Diagnostics::expectedTokenInMacroParameters); + + } + else + { + macro->flavor = MacroDefinition::Flavor::ObjectLike; } + auto nameLoc = NameLoc(nameToken); + macro->nameAndLoc = NameLoc(nameToken); + + context->m_preprocessor->globalEnv.macros[name] = macro; + // consume tokens until end-of-line for(;;) { - Token token = AdvanceRawToken(context); - if( token.type == TokenType::EndOfDirective ) + Token token = PeekRawToken(context); + switch(token.type) { - // Last token on line will be turned into a conceptual end-of-file - // token for the sub-stream that the macro expands into. + default: + // In the ordinary case, we just add the token to the definition, + // and keep consuming more tokens. + AdvanceRawToken(context); + macro->tokens.add(token); + continue; + + case TokenType::EndOfFile: + case TokenType::NewLine: + // The end of the current line/file ends the directive, and serves + // as the end-of-file marker for the macro's definition as well. + // token.type = TokenType::EndOfFile; macro->tokens.add(token); break; } - - // In the ordinary case, we just add the token to the definition - macro->tokens.add(token); + break; } + + _parseMacroOps(context->m_preprocessor, macro, mapParamNameToIndex); } // Handle a `#undef` directive @@ -2079,14 +3137,14 @@ static void HandleUndefDirective(PreprocessorDirectiveContext* context) return; Name* name = nameToken.getName(); - PreprocessorEnvironment* env = &context->preprocessor->globalEnv; - PreprocessorMacro* macro = LookupMacro(env, name); + Environment* env = &context->m_preprocessor->globalEnv; + MacroDefinition* macro = LookupMacro(env, name); if (macro != NULL) { // name was defined, so remove it env->macros.Remove(name); - DestroyMacro(context->preprocessor, macro); + delete macro; } else { @@ -2095,96 +3153,137 @@ static void HandleUndefDirective(PreprocessorDirectiveContext* context) } } +static String _readDirectiveMessage(PreprocessorDirectiveContext* context) +{ + StringBuilder result; + + while(!IsEndOfLine(context)) + { + Token token = AdvanceRawToken(context); + if(token.flags & TokenFlag::AfterWhitespace) + { + if(result.getLength() != 0) + { + result.append(" "); + } + } + result.append(token.getContent()); + } + + return result; +} + // Handle a `#warning` directive static void HandleWarningDirective(PreprocessorDirectiveContext* context) { - // Consume the directive, and inform the lexer to process the remainder of the line as a custom message. - AdvanceRawToken(context, kLexerFlag_ExpectDirectiveMessage); + _setLexerDiagnosticSuppression(getInputFile(context), true); + + // Consume the directive + AdvanceRawToken(context); + + // Read the message. + String message = _readDirectiveMessage(context); - // Read the message token. - Token messageToken; - Expect(context, TokenType::DirectiveMessage, Diagnostics::expectedTokenInPreprocessorDirective, &messageToken); + _setLexerDiagnosticSuppression(getInputFile(context), false); // Report the custom error. - GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedWarning, messageToken.getContent()); + GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedWarning, message); } // Handle a `#error` directive static void HandleErrorDirective(PreprocessorDirectiveContext* context) { - // Consume the directive, and inform the lexer to process the remainder of the line as a custom message. - AdvanceRawToken(context, kLexerFlag_ExpectDirectiveMessage); + _setLexerDiagnosticSuppression(getInputFile(context), true); + + // Consume the directive + AdvanceRawToken(context); + + // Read the message. + String message = _readDirectiveMessage(context); - // Read the message token. - Token messageToken; - Expect(context, TokenType::DirectiveMessage, Diagnostics::expectedTokenInPreprocessorDirective, &messageToken); + _setLexerDiagnosticSuppression(getInputFile(context), false); // Report the custom error. - GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedError, messageToken.getContent()); + GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::userDefinedError, message); +} + +static void _handleDefaultLineDirective(PreprocessorDirectiveContext* context) +{ + SourceLoc directiveLoc = GetDirectiveLoc(context); + auto inputStream = getInputFile(context); + auto sourceView = inputStream->getLexer()->m_sourceView; + sourceView->addDefaultLineDirective(directiveLoc); +} + +static void _diagnoseInvalidLineDirective(PreprocessorDirectiveContext* context) +{ + GetSink(context)->diagnose(PeekLoc(context), Diagnostics::expected2TokensInPreprocessorDirective, + TokenType::IntegerLiteral, + "default", + GetDirectiveName(context)); + context->m_parseError = true; } // Handle a `#line` directive static void HandleLineDirective(PreprocessorDirectiveContext* context) { - auto inputStream = context->preprocessor->inputStream; + auto inputStream = getInputFile(context); int line = 0; SourceLoc directiveLoc = GetDirectiveLoc(context); - // `#line <integer-literal> ...` - if (PeekTokenType(context) == TokenType::IntegerLiteral) + switch(PeekTokenType(context)) { + case TokenType::IntegerLiteral: line = StringToInt(AdvanceToken(context).getContent()); - } - // `#line` - // `#line default` - else if ( - PeekTokenType(context) == TokenType::EndOfDirective - || (PeekTokenType(context) == TokenType::Identifier - && PeekToken(context).getContent() == "default")) - { - AdvanceToken(context); + break; - // Stop overriding source locations. - auto sourceView = inputStream->primaryStream->lexer.m_sourceView; - sourceView->addDefaultLineDirective(directiveLoc); + case TokenType::EndOfFile: + case TokenType::NewLine: + // `#line` + _handleDefaultLineDirective(context); return; - } - else - { - GetSink(context)->diagnose(PeekLoc(context), Diagnostics::expected2TokensInPreprocessorDirective, - TokenType::IntegerLiteral, - "default", - GetDirectiveName(context)); - context->parseError = true; + + case TokenType::Identifier: + if (PeekToken(context).getContent() == "default") + { + AdvanceToken(context); + _handleDefaultLineDirective(context); + return; + } + /* else, fall through to: */ + default: + _diagnoseInvalidLineDirective(context); return; } - auto sourceManager = context->preprocessor->getSourceManager(); + auto sourceManager = context->m_preprocessor->getSourceManager(); String file; - if (PeekTokenType(context) == TokenType::EndOfDirective) + switch(PeekTokenType(context)) { + case TokenType::EndOfFile: + case TokenType::NewLine: file = sourceManager->getPathInfo(directiveLoc).foundPath; - } - else if (PeekTokenType(context) == TokenType::StringLiteral) - { + break; + + case TokenType::StringLiteral: file = getStringLiteralTokenValue(AdvanceToken(context)); - } - else if (PeekTokenType(context) == TokenType::IntegerLiteral) - { + break; + + case TokenType::IntegerLiteral: // Note(tfoley): GLSL allows the "source string" to be indicated by an integer // TODO(tfoley): Figure out a better way to handle this, if it matters file = AdvanceToken(context).getContent(); - } - else - { + break; + + default: Expect(context, TokenType::StringLiteral, Diagnostics::expectedTokenInPreprocessorDirective); return; } - auto sourceView = inputStream->primaryStream->lexer.m_sourceView; + auto sourceView = inputStream->getLexer()->m_sourceView; sourceView->addLineDirective(directiveLoc, file, line); } @@ -2209,7 +3308,7 @@ SLANG_PRAGMA_DIRECTIVE_CALLBACK(handlePragmaOnceDirective) // We are using the 'uniqueIdentity' as determined by the ISlangFileSystemEx interface to determine file identities. auto directiveLoc = GetDirectiveLoc(context); - auto issuedFromPathInfo = context->preprocessor->getSourceManager()->getPathInfo(directiveLoc, SourceLocType::Actual); + auto issuedFromPathInfo = context->m_preprocessor->getSourceManager()->getPathInfo(directiveLoc, SourceLocType::Actual); // Must have uniqueIdentity for a #pragma once to work if (!issuedFromPathInfo.hasUniqueIdentity()) @@ -2218,7 +3317,7 @@ SLANG_PRAGMA_DIRECTIVE_CALLBACK(handlePragmaOnceDirective) return; } - context->preprocessor->pragmaOnceUniqueIdentities.Add(issuedFromPathInfo.uniqueIdentity); + context->m_preprocessor->pragmaOnceUniqueIdentities.Add(issuedFromPathInfo.uniqueIdentity); } // Information about a specific `#pragma` directive @@ -2361,17 +3460,23 @@ static PreprocessorDirective const* FindDirective(String const& name) static void HandleDirective(PreprocessorDirectiveContext* context) { // Try to read the directive name. - context->directiveToken = PeekRawToken(context); + context->m_directiveToken = PeekRawToken(context); TokenType directiveTokenType = GetDirective(context).type; // An empty directive is allowed, and ignored. - if (directiveTokenType == TokenType::EndOfDirective) + switch( directiveTokenType ) { + case TokenType::EndOfFile: + case TokenType::NewLine: return; + + default: + break; } + // Otherwise the directive name had better be an identifier - else if (directiveTokenType != TokenType::Identifier) + if (directiveTokenType != TokenType::Identifier) { GetSink(context)->diagnose(GetDirectiveLoc(context), Diagnostics::expectedPreprocessorDirectiveName); SkipToEndOfLine(context); @@ -2383,7 +3488,7 @@ static void HandleDirective(PreprocessorDirectiveContext* context) // If we are skipping disabled code, and the directive is not one // of the small number that need to run even in that case, skip it. - if (IsSkipping(context) && !(directive->flags & PreprocessorDirectiveFlag::ProcessWhenSkipping)) + if (isSkipping(context) && !(directive->flags & PreprocessorDirectiveFlag::ProcessWhenSkipping)) { SkipToEndOfLine(context); return; @@ -2403,39 +3508,78 @@ static void HandleDirective(PreprocessorDirectiveContext* context) expectEndOfDirective(context); } +void Preprocessor::popInputFile() +{ + auto inputFile = m_currentInputFile; + SLANG_ASSERT(inputFile); + + // We expect the file to be at its end, so that the + // next token read would be an end-of-file token. + // + auto expansionStream = inputFile->getExpansionStream(); + Token eofToken = expansionStream->peekRawToken(); + SLANG_ASSERT(eofToken.type == TokenType::EndOfFile); + + // If there are any open preprocessor conditionals in the file, then + // we need to diagnose them as an error, because they were not closed + // at the end of the file. + // + for(auto conditional = inputFile->getInnerMostConditional(); conditional; conditional = conditional->parent) + { + GetSink(this)->diagnose(eofToken, Diagnostics::endOfFileInPreprocessorConditional); + GetSink(this)->diagnose(conditional->ifToken, Diagnostics::seeDirective, conditional->ifToken.getContent()); + } + + // We will update the current file to the parent of whatever + // the `inputFile` was (usually the file that `#include`d it). + // + auto parentFile = inputFile->m_parent; + m_currentInputFile = parentFile; + + // As a subtle special case, if this is the *last* file to be popped, + // then we will update the canonical EOF token used by the preprocessor + // to be the EOF token for `inputFile`, so that the source location + // information returned will be accurate. + // + if(!parentFile) + { + endOfFileToken = eofToken; + } + + delete inputFile; +} + // Read one token using the full preprocessor, with all its behaviors. static Token ReadToken(Preprocessor* preprocessor) { for (;;) { - // Depending on what the lookahead token is, we - // might need to start expanding it. - // - // Note: doing this at the start of this loop - // is important, in case a macro has an empty - // expansion, and we end up looking at a different - // token after applying the expansion. - if(!IsSkipping(preprocessor)) - { - MaybeBeginMacroExpansion(preprocessor); - } + auto inputFile = preprocessor->m_currentInputFile; + if(!inputFile) + return preprocessor->endOfFileToken; + + auto expansionStream = inputFile->getExpansionStream(); // Look at the next raw token in the input. - Token const& token = PeekRawToken(preprocessor); + Token token = expansionStream->peekRawToken(); if (token.type == TokenType::EndOfFile) - return token; + { + preprocessor->popInputFile(); + continue; + } // If we have a directive (`#` at start of line) then handle it if ((token.type == TokenType::Pound) && (token.flags & TokenFlag::AtStartOfLine)) { // Skip the `#` - AdvanceRawToken(preprocessor); + expansionStream->readRawToken(); // Create a context for parsing the directive PreprocessorDirectiveContext directiveContext; - directiveContext.preprocessor = preprocessor; - directiveContext.parseError = false; - directiveContext.haveDoneEndOfDirectiveChecks = false; + directiveContext.m_preprocessor = preprocessor; + directiveContext.m_parseError = false; + directiveContext.m_haveDoneEndOfDirectiveChecks = false; + directiveContext.m_inputFile = inputFile; // Parse and handle the directive HandleDirective(&directiveContext); @@ -2443,59 +3587,34 @@ static Token ReadToken(Preprocessor* preprocessor) } // otherwise, if we are currently in a skipping mode, then skip tokens - if (IsSkipping(preprocessor)) + if (inputFile->isSkipping()) { - AdvanceRawToken(preprocessor); + expansionStream->readRawToken(); continue; } - // otherwise read a token, which may involve macro expansion - return AdvanceToken(preprocessor); - } -} + token = expansionStream->peekToken(); + if (token.type == TokenType::EndOfFile) + { + preprocessor->popInputFile(); + continue; + } -// intialize a preprocessor context, using the given sink for errros -static void InitializePreprocessor( - Preprocessor* preprocessor, - DiagnosticSink* sink) -{ - preprocessor->sink = sink; - preprocessor->includeSystem = NULL; - preprocessor->endOfFileToken.type = TokenType::EndOfFile; - preprocessor->endOfFileToken.flags = TokenFlag::AtStartOfLine; + expansionStream->readToken(); + return token; + } } // clean up after an environment -PreprocessorEnvironment::~PreprocessorEnvironment() +Environment::~Environment() { for (auto pair : this->macros) { - DestroyMacro(NULL, pair.Value); + auto macro = pair.Value; + delete macro; } } -// finalize a preprocessor and free any memory still in use -static void FinalizePreprocessor( - Preprocessor* preprocessor) -{ - // Clear out any waiting input streams - PreprocessorInputStream* input = preprocessor->inputStream; - while (input) - { - PreprocessorInputStream* parent = input->parent; - EndInputStream(preprocessor, input); - input = parent; - } - -#if 0 - // clean up any macros that were allocated - for (auto pair : preprocessor->globalEnv.macros) - { - DestroyMacro(preprocessor, pair.Value); - } -#endif -} - // Add a simple macro definition from a string (e.g., for a // `-D` option passed on the command line static void DefineMacro( @@ -2505,7 +3624,8 @@ static void DefineMacro( { PathInfo pathInfo = PathInfo::makeCommandLine(); - PreprocessorMacro* macro = CreateMacro(preprocessor); + MacroDefinition* macro = new MacroDefinition(); + macro->flavor = MacroDefinition::Flavor::ObjectLike; auto sourceManager = preprocessor->getSourceManager(); @@ -2520,17 +3640,20 @@ static void DefineMacro( // Use existing `Lexer` to generate a token stream. Lexer lexer; lexer.initialize(valueView, GetSink(preprocessor), preprocessor->getNamePool(), sourceManager->getMemoryArena()); - macro->tokens = lexer.lexAllTokens(); + macro->tokens = lexer.lexAllSemanticTokens(); + + Dictionary<Name*, Index> mapParamNameToIndex; + _parseMacroOps(preprocessor, macro, mapParamNameToIndex); Name* keyName = preprocessor->getNamePool()->getName(key); macro->nameAndLoc.name = keyName; macro->nameAndLoc.loc = keyView->getRange().begin; - PreprocessorMacro* oldMacro = NULL; + MacroDefinition* oldMacro = NULL; if (preprocessor->globalEnv.macros.TryGetValue(keyName, oldMacro)) { - DestroyMacro(preprocessor, oldMacro); + delete oldMacro; } preprocessor->globalEnv.macros[keyName] = macro; @@ -2545,16 +3668,30 @@ static TokenList ReadAllTokens( { Token token = ReadToken(preprocessor); - tokens.add(token); + switch(token.type) + { + default: + tokens.add(token); + break; - // Note: we include the EOF token in the list, - // since that is expected by the `TokenList` type. - if (token.type == TokenType::EndOfFile) + case TokenType::EndOfFile: + // Note: we include the EOF token in the list, + // since that is expected by the `TokenList` type. + tokens.add(token); + return tokens; + + case TokenType::WhiteSpace: + case TokenType::NewLine: + case TokenType::LineComment: + case TokenType::BlockComment: + case TokenType::Invalid: break; + } } - return tokens; } +} // namespace preprocessor + /// Try to look up a macro with the given `macroName` and produce its value as a string Result findMacroValue( Preprocessor* preprocessor, @@ -2562,23 +3699,22 @@ Result findMacroValue( String& outValue, SourceLoc& outLoc) { + using namespace preprocessor; + auto namePool = preprocessor->namePool; - auto macro = LookupMacro(preprocessor, namePool->getName(macroName)); + auto macro = LookupMacro(&preprocessor->globalEnv, namePool->getName(macroName)); if(!macro) return SLANG_FAIL; - if(macro->flavor != PreprocessorMacroFlavor::ObjectLike) + if(macro->flavor != MacroDefinition::Flavor::ObjectLike) return SLANG_FAIL; - MacroExpansion* expansion = new MacroExpansion(); - initializeMacroExpansion(preprocessor, expansion, macro); - - // Don't set macro expansion location - pushMacroExpansion(preprocessor, expansion, SourceLoc()); + MacroInvocation* invocation = new MacroInvocation(preprocessor, macro, nullptr, SourceLoc(), SourceLoc()); + invocation->prime(); String value; for(bool first = true;;first = false) { - Token token = ReadToken(preprocessor); + Token token = invocation->readToken(); if(token.type == TokenType::EndOfFile) break; @@ -2587,21 +3723,13 @@ Result findMacroValue( value.append(token.getContent()); } + delete invocation; + outValue = value; outLoc = macro->getLoc(); return SLANG_OK; } -void PreprocessorHandler::handleEndOfFile(Preprocessor* preprocessor) -{ - SLANG_UNUSED(preprocessor); -} - -void PreprocessorHandler::handleFileDependency(String const& path) -{ - SLANG_UNUSED(path); -} - TokenList preprocessSource( SourceFile* file, DiagnosticSink* sink, @@ -2629,28 +3757,36 @@ TokenList preprocessSource( SourceFile* file, PreprocessorDesc const& desc) { + using namespace preprocessor; + Preprocessor preprocessor; - InitializePreprocessor(&preprocessor, desc.sink); preprocessor.sink = desc.sink; preprocessor.includeSystem = desc.includeSystem; preprocessor.fileSystem = desc.fileSystem; preprocessor.namePool = desc.namePool; + preprocessor.endOfFileToken.type = TokenType::EndOfFile; + preprocessor.endOfFileToken.flags = TokenFlag::AtStartOfLine; + // Add builtin macros { auto namePool = desc.namePool; const char*const builtinNames[] = { "__FILE__", "__LINE__" }; - const PreprocessorMacroFlavor builtinFlavors[] = { PreprocessorMacroFlavor::BuiltinFile, PreprocessorMacroFlavor::BuiltinLine }; + const MacroDefinition::Opcode builtinOpcodes[] = { MacroDefinition::Opcode::BuiltinFile, MacroDefinition::Opcode::BuiltinLine }; for (Index i = 0; i < SLANG_COUNT_OF(builtinNames); i++) { auto name = namePool->getName(builtinNames[i]); - PreprocessorMacro* macro = CreateMacro(&preprocessor); - macro->flavor = builtinFlavors[i]; + MacroDefinition::Op op; + op.opcode = builtinOpcodes[i]; + + MacroDefinition* macro = new MacroDefinition(); + macro->flavor = MacroDefinition::Flavor::BuiltinObjectLike; macro->nameAndLoc = NameLoc(name); + macro->ops.add(op); preprocessor.globalEnv.macros[name] = macro; } @@ -2670,22 +3806,23 @@ TokenList preprocessSource( } } - // This is the originating source we are compiling - there is no 'initiating' source loc, - // so pass SourceLoc(0) - meaning it has no initiating location. - SourceView* sourceView = sourceManager->createSourceView(file, nullptr, SourceLoc::fromRaw(0)); + { + // This is the originating source we are compiling - there is no 'initiating' source loc, + // so pass SourceLoc(0) - meaning it has no initiating location. + SourceView* sourceView = sourceManager->createSourceView(file, nullptr, SourceLoc::fromRaw(0)); - // create an initial input stream based on the provided buffer - preprocessor.inputStream = CreateInputStreamForSource(&preprocessor, sourceView); + // create an initial input stream based on the provided buffer + InputFile* primaryInputFile = new InputFile(&preprocessor, sourceView); + preprocessor.pushInputFile(primaryInputFile); + } TokenList tokens = ReadAllTokens(&preprocessor); if(handler) { - handler->handleEndOfFile(&preprocessor); + handler->handleEndOfTranslationUnit(&preprocessor); } - FinalizePreprocessor(&preprocessor); - // debugging: build the pre-processed source back together #if 0 StringBuilder sb; @@ -2709,4 +3846,4 @@ TokenList preprocessSource( return tokens; } -} +} // namespace Slang diff --git a/source/slang/slang-preprocessor.h b/source/slang/slang-preprocessor.h index 9f3940d29..5f66be405 100644 --- a/source/slang/slang-preprocessor.h +++ b/source/slang/slang-preprocessor.h @@ -12,7 +12,11 @@ namespace Slang { class DiagnosticSink; class Linkage; -struct Preprocessor; +namespace preprocessor +{ + struct Preprocessor; +} +using preprocessor::Preprocessor; /// A handler for callbacks invoked by the preprocessor. /// @@ -22,7 +26,7 @@ struct Preprocessor; /// struct PreprocessorHandler { - virtual void handleEndOfFile(Preprocessor* preprocessor); + virtual void handleEndOfTranslationUnit(Preprocessor* preprocessor); virtual void handleFileDependency(String const& path); }; diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index c36808199..d16770a1d 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -1375,7 +1375,7 @@ protected: // whether any macro values were set in a given source file // that are semantically relevant to other stages of compilation. // - void handleEndOfFile(Preprocessor* preprocessor) SLANG_OVERRIDE + void handleEndOfTranslationUnit(Preprocessor* preprocessor) SLANG_OVERRIDE { // We look at the preprocessor state after reading the entire // source file/string, in order to see if any macros have been diff --git a/tests/bugs/token-limit.slang b/tests/bugs/token-limit.slang index bab18575c..8f7046296 100644 --- a/tests/bugs/token-limit.slang +++ b/tests/bugs/token-limit.slang @@ -6,7 +6,8 @@ // Build up a 2048 byte name #define LONG_NAME abcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGHabcdefghABCDEFGH -#define CONCAT(A, B) A ## B +#define CONCAT2(A, B) A ## B +#define CONCAT(A, B) CONCAT2(A, B) //TEST_INPUT:ubuffer(data=[1 2 3 4], stride=4):name=inputBuffer RWStructuredBuffer<int> inputBuffer; diff --git a/tests/current-bugs/paste-non-expansion.slang b/tests/current-bugs/paste-non-expansion.slang deleted file mode 100644 index 8270db676..000000000 --- a/tests/current-bugs/paste-non-expansion.slang +++ /dev/null @@ -1,16 +0,0 @@ -//DIAGNOSTIC_TEST:SIMPLE:-E - -// NOTE! This test should *fail*, if preprocessor is working correctly! - -// This demonstrates the existance of a bug in Slang preprocessor macro expansion. Could be due to incorrect paste handling -// or perhaps the rules around parameter expansion. - -#define CONCAT2(x, y) x ## y -#define CONCAT(x, y) CONCAT2(x, y) - -#define SOMETHING someThing - -// Should be someThingElse -CONCAT(SOMETHING, Else) -// Should be SOMETHINGAnother, but Slang expands to produce someThingAnother -CONCAT2(SOMETHING, Another)
\ No newline at end of file diff --git a/tests/current-bugs/preproc-concat-5.slang b/tests/current-bugs/preproc-concat-5.slang deleted file mode 100644 index 2be554f89..000000000 --- a/tests/current-bugs/preproc-concat-5.slang +++ /dev/null @@ -1,13 +0,0 @@ -//DISABLE_DIAGNOSTIC_TEST:SIMPLE:-E - -// NOTE! This test should *fail*, if preprocessor is working correctly! - -// It should produce 'THING', as the original invocation should have disabled THING, -// but it actually ends up in an infinite loop. - -#define CONCAT(x, y) x ## y - -#define THING2 THING -#define THING CONCAT(THING, 2) - -THING diff --git a/tests/current-bugs/preproc-detail-1.slang.expected b/tests/current-bugs/preproc-detail-1.slang.expected deleted file mode 100644 index ba93983af..000000000 --- a/tests/current-bugs/preproc-detail-1.slang.expected +++ /dev/null @@ -1,7 +0,0 @@ -result code = 0 -standard error = { -tests/current-bugs/preproc-detail-1.slang(10): error 15501: wrong number of arguments to macro (expected 1, got 0) -} -standard output = { - -} diff --git a/tests/current-bugs/preproc-expand-1.slang b/tests/current-bugs/preproc-expand-1.slang deleted file mode 100644 index f95b432e2..000000000 --- a/tests/current-bugs/preproc-expand-1.slang +++ /dev/null @@ -1,16 +0,0 @@ -//DIAGNOSTIC_TEST:SIMPLE:-E - -// NOTE! This test should *fail*, if preprocessor is working correctly! - -// Should produce: Hi -// Slang produces: C ( Hi ) - -#define OPEN ( -#define CLOSE ) - -#define C(x) x - -#define B(x) x - -B(C OPEN Hi CLOSE) - diff --git a/tests/current-bugs/preproc-pound-pound-1.slang.expected b/tests/current-bugs/preproc-pound-pound-1.slang.expected deleted file mode 100644 index 80a64402a..000000000 --- a/tests/current-bugs/preproc-pound-pound-1.slang.expected +++ /dev/null @@ -1,6 +0,0 @@ -result code = 0 -standard error = { -} -standard output = { -Hello ## There ; -} diff --git a/tests/diagnostics/token-paste-location.slang b/tests/diagnostics/token-paste-location.slang index 4da66bab3..ded5892f7 100644 --- a/tests/diagnostics/token-paste-location.slang +++ b/tests/diagnostics/token-paste-location.slang @@ -1,8 +1,8 @@ //DIAGNOSTIC_TEST:SIMPLE: -#define SOME % -#define THING % +#define SOME + +#define THING + #define A SOME #define B THING diff --git a/tests/diagnostics/token-paste-location.slang.expected b/tests/diagnostics/token-paste-location.slang.expected index 987bff15e..4ebdf4f3a 100644 --- a/tests/diagnostics/token-paste-location.slang.expected +++ b/tests/diagnostics/token-paste-location.slang.expected @@ -1,9 +1,9 @@ result code = -1 standard error = { -token paste(1): error 20001: unexpected '%', expected identifier +token paste(1): error 20001: unexpected '++', expected identifier tests/diagnostics/token-paste-location.slang(10): note: see token pasted location -%% -^ +++ +^~ } standard output = { } diff --git a/tests/diagnostics/x-macro-line-continuation.slang.expected b/tests/diagnostics/x-macro-line-continuation.slang.expected index 6b910b701..306123ec5 100644 --- a/tests/diagnostics/x-macro-line-continuation.slang.expected +++ b/tests/diagnostics/x-macro-line-continuation.slang.expected @@ -1,8 +1,8 @@ result code = -1 standard error = { -tests/diagnostics/x-macro-line-continuation.slang(12): error 15501: wrong number of arguments to macro (expected 1, got 2) - M(5) \ - ^ +tests/diagnostics/x-macro-line-continuation.slang(11): error 15501: wrong number of arguments to macro (expected 1, got 2) + M(4, 4) \ + ^ } standard output = { } diff --git a/tests/parser/incomplete-member-decl.slang.expected b/tests/parser/incomplete-member-decl.slang.expected index ed5695977..d7df41f58 100644 --- a/tests/parser/incomplete-member-decl.slang.expected +++ b/tests/parser/incomplete-member-decl.slang.expected @@ -3,6 +3,7 @@ standard error = { tests/parser/incomplete-member-decl.slang(19): error 20001: unexpected identifier, expected '(' int MyType<X> inner; ^~~~~ +tests/parser/incomplete-member-decl.slang(20): error 20001: unexpected end of file, expected identifier } standard output = { } diff --git a/tests/preprocessor/error.slang.expected b/tests/preprocessor/error.slang.expected index 927819780..f191f7aaa 100644 --- a/tests/preprocessor/error.slang.expected +++ b/tests/preprocessor/error.slang.expected @@ -1,6 +1,6 @@ result code = -1 standard error = { -tests/preprocessor/error.slang(11): error 15900: #error: This isn't valid! +tests/preprocessor/error.slang(11): error 15900: #error: This isn't valid! #error This isn't valid! ^~~~~ } diff --git a/tests/preprocessor/paste-non-expansion.slang b/tests/preprocessor/paste-non-expansion.slang new file mode 100644 index 000000000..d3487aeef --- /dev/null +++ b/tests/preprocessor/paste-non-expansion.slang @@ -0,0 +1,13 @@ +//DIAGNOSTIC_TEST:SIMPLE:-E + +// This os a regression test for a bug in Slang preprocessor macro expansion. + +#define CONCAT2(x, y) x ## y +#define CONCAT(x, y) CONCAT2(x, y) + +#define SOMETHING someThing + +// Should be someThingElse +CONCAT(SOMETHING, Else) +// Should be SOMETHINGAnother, but old Slang expands to produce someThingAnother +CONCAT2(SOMETHING, Another)
\ No newline at end of file diff --git a/tests/current-bugs/paste-non-expansion.slang.expected b/tests/preprocessor/paste-non-expansion.slang.expected index 2fbb7bad3..b62416de7 100644 --- a/tests/current-bugs/paste-non-expansion.slang.expected +++ b/tests/preprocessor/paste-non-expansion.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -someThingElse someThingAnother +someThingElse SOMETHINGAnother } diff --git a/tests/current-bugs/preproc-concat-1.slang b/tests/preprocessor/preproc-concat-1.slang index d9f205457..44d6c1d61 100644 --- a/tests/current-bugs/preproc-concat-1.slang +++ b/tests/preprocessor/preproc-concat-1.slang @@ -1,11 +1,9 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - #define CONCAT(a, b) a ## b // Correct output AB; -// Slang output +// Old Slang output // ab; #define A a diff --git a/tests/current-bugs/preproc-concat-1.slang.expected b/tests/preprocessor/preproc-concat-1.slang.expected index 673d7bc4c..84e8c280f 100644 --- a/tests/current-bugs/preproc-concat-1.slang.expected +++ b/tests/preprocessor/preproc-concat-1.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -ab ; +AB ; } diff --git a/tests/current-bugs/preproc-concat-2.slang b/tests/preprocessor/preproc-concat-2.slang index f3c4d28ff..b965eeaa3 100644 --- a/tests/current-bugs/preproc-concat-2.slang +++ b/tests/preprocessor/preproc-concat-2.slang @@ -1,7 +1,5 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - #define CONCAT(a, b) a ## b #define A a @@ -11,7 +9,7 @@ #define B2 B // Correct output: a A2B2 b; -// Slang output +// Old Slang output // a ab b ; CONCAT(A2 A2, B2 B2); diff --git a/tests/current-bugs/preproc-concat-2.slang.expected b/tests/preprocessor/preproc-concat-2.slang.expected index 4b30fc7c2..1badd0da0 100644 --- a/tests/current-bugs/preproc-concat-2.slang.expected +++ b/tests/preprocessor/preproc-concat-2.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -a ab b ; +a A2B2 b ; } diff --git a/tests/current-bugs/preproc-concat-3.slang b/tests/preprocessor/preproc-concat-3.slang index c4077f75d..7f1953a2d 100644 --- a/tests/current-bugs/preproc-concat-3.slang +++ b/tests/preprocessor/preproc-concat-3.slang @@ -1,7 +1,5 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - #define CONCAT(a, b) a ## b #define A a @@ -13,6 +11,6 @@ // Gives error (as trys to concat unexpanded input) // <source>:11:1: error: pasting formed ')CONCAT', an invalid preprocessing token // -// Slang output: aabb ; +// Old Slang output: aabb ; CONCAT(CONCAT(A2, A2), CONCAT(B2, B2)); diff --git a/tests/preprocessor/preproc-concat-3.slang.expected b/tests/preprocessor/preproc-concat-3.slang.expected new file mode 100644 index 000000000..b39a659ed --- /dev/null +++ b/tests/preprocessor/preproc-concat-3.slang.expected @@ -0,0 +1,9 @@ +result code = 0 +standard error = { +tests/preprocessor/preproc-concat-3.slang(5): warning 15503: toking pasting with '##' resulted in the invalid token ')CONCAT' +#define CONCAT(a, b) a ## b + ^~ +} +standard output = { +CONCAT ( a , a ) CONCAT ( b , b ) ; +} diff --git a/tests/current-bugs/preproc-concat-4.slang b/tests/preprocessor/preproc-concat-4.slang index 31ec16268..4d1e3425f 100644 --- a/tests/current-bugs/preproc-concat-4.slang +++ b/tests/preprocessor/preproc-concat-4.slang @@ -1,7 +1,5 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - #define CONCAT(a, b) a ## b #define A a @@ -17,7 +15,7 @@ // CONCAT is disabled, A and B are expanded on next pass // A2 B2 are first and last tokens pre expansion args // -// Slang outputs +// Old Slang outputs // ab ab ab CONCAT(CONCAT(A, B) A2, B2 CONCAT(A, B)) diff --git a/tests/current-bugs/preproc-concat-4.slang.expected b/tests/preprocessor/preproc-concat-4.slang.expected index 82d7225b2..6d3b1ca59 100644 --- a/tests/current-bugs/preproc-concat-4.slang.expected +++ b/tests/preprocessor/preproc-concat-4.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -ab ab ab +CONCAT ( a , b ) A2B2 CONCAT ( a , b ) } diff --git a/tests/current-bugs/preproc-detail-2.slang.expected b/tests/preprocessor/preproc-concat-5.slang.expected index b48d7909f..a401f9c6c 100644 --- a/tests/current-bugs/preproc-detail-2.slang.expected +++ b/tests/preprocessor/preproc-concat-5.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -a b c +THING } diff --git a/tests/current-bugs/preproc-detail-1.slang b/tests/preprocessor/preproc-detail-1.slang index 4cff0d205..ed465c38c 100644 --- a/tests/current-bugs/preproc-detail-1.slang +++ b/tests/preprocessor/preproc-detail-1.slang @@ -1,9 +1,7 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - // If a macro can take a single parameter, it is valid to pass in 'nothing'. -// Slang outputs an error about the wrong amount of parameters +// Old Slang outputs an error about the wrong amount of parameters // Correct output: a b #define A(x) a x b diff --git a/tests/current-bugs/preproc-concat-3.slang.expected b/tests/preprocessor/preproc-detail-1.slang.expected index 8be308266..7ebaef377 100644 --- a/tests/current-bugs/preproc-concat-3.slang.expected +++ b/tests/preprocessor/preproc-detail-1.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -aabb ; +a b } diff --git a/tests/current-bugs/preproc-detail-2.slang b/tests/preprocessor/preproc-detail-2.slang index eb687db31..7cb0c4f7e 100644 --- a/tests/current-bugs/preproc-detail-2.slang +++ b/tests/preprocessor/preproc-detail-2.slang @@ -1,7 +1,5 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - // Macro parameters must have unique names #define A(x, x) a x b x c diff --git a/tests/preprocessor/preproc-detail-2.slang.expected b/tests/preprocessor/preproc-detail-2.slang.expected new file mode 100644 index 000000000..16468fc68 --- /dev/null +++ b/tests/preprocessor/preproc-detail-2.slang.expected @@ -0,0 +1,9 @@ +result code = 0 +standard error = { +tests/preprocessor/preproc-detail-2.slang(7): error 15408: redefinition of macro parameter 'A' +#define A(x, x) a x b x c + ^ +} +standard output = { +a b c +} diff --git a/tests/current-bugs/preproc-detail-3.slang b/tests/preprocessor/preproc-detail-3.slang index b0675204b..7d07af33b 100644 --- a/tests/current-bugs/preproc-detail-3.slang +++ b/tests/preprocessor/preproc-detail-3.slang @@ -1,7 +1,5 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - // Undefining a macro that is not defined within C/C++ is defined as *not* an error or a warning. // On checking with DXC/FXC they also have this behavior (ie they don't output anything) // It's arguable if Slang should match this behavior - at least it is a warning. diff --git a/tests/current-bugs/preproc-detail-3.slang.expected b/tests/preprocessor/preproc-detail-3.slang.expected index a44c60220..44f38e85f 100644 --- a/tests/current-bugs/preproc-detail-3.slang.expected +++ b/tests/preprocessor/preproc-detail-3.slang.expected @@ -1,6 +1,6 @@ result code = 0 standard error = { -tests/current-bugs/preproc-detail-3.slang(9): warning 15401: macro 'C' is not defined +tests/preprocessor/preproc-detail-3.slang(9): warning 15401: macro 'C' is not defined #undef C ^ } diff --git a/tests/current-bugs/preproc-expand-1.slang.expected b/tests/preprocessor/preproc-expand-1.slang.expected index d46fa1159..70aa3f352 100644 --- a/tests/current-bugs/preproc-expand-1.slang.expected +++ b/tests/preprocessor/preproc-expand-1.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -C ( Hi ) +Hi } diff --git a/tests/current-bugs/preproc-pound-pound-1.slang b/tests/preprocessor/preproc-pound-pound-1.slang index 7f369b861..3aa157bae 100644 --- a/tests/current-bugs/preproc-pound-pound-1.slang +++ b/tests/preprocessor/preproc-pound-pound-1.slang @@ -1,10 +1,8 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - // GCC: <source>:1:9: error: '##' cannot appear at either end of a macro expansion. // Clang: <source>:1:21: error: '##' cannot appear at start of macro expansion -// Slang outputs Hello ## There; +// Old Slang outputs Hello ## There; #define POUND_POUND ## Hello POUND_POUND There; diff --git a/tests/preprocessor/preproc-pound-pound-1.slang.expected b/tests/preprocessor/preproc-pound-pound-1.slang.expected new file mode 100644 index 000000000..92ba92f20 --- /dev/null +++ b/tests/preprocessor/preproc-pound-pound-1.slang.expected @@ -0,0 +1,9 @@ +result code = 0 +standard error = { +tests/preprocessor/preproc-pound-pound-1.slang(8): error 15405: '##' is not allowed at the start of a macro body +#define POUND_POUND ## + ^~ +} +standard output = { +Hello ## There ; +} diff --git a/tests/current-bugs/preproc-pound-pound-2.slang b/tests/preprocessor/preproc-pound-pound-2.slang index 44181261b..d8038c2d4 100644 --- a/tests/current-bugs/preproc-pound-pound-2.slang +++ b/tests/preprocessor/preproc-pound-pound-2.slang @@ -1,13 +1,11 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - #define A a #define B b #define OBJ A ## B // Should output AB -// Slang outputs ab +// Old Slang outputs ab OBJ diff --git a/tests/current-bugs/preproc-pound-pound-2.slang.expected b/tests/preprocessor/preproc-pound-pound-2.slang.expected index a818a4683..cacad8abd 100644 --- a/tests/current-bugs/preproc-pound-pound-2.slang.expected +++ b/tests/preprocessor/preproc-pound-pound-2.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -ab +AB } diff --git a/tests/current-bugs/preproc-stringify-1.slang b/tests/preprocessor/preproc-stringify-1.slang index 03e8366b5..32bfb00cc 100644 --- a/tests/current-bugs/preproc-stringify-1.slang +++ b/tests/preprocessor/preproc-stringify-1.slang @@ -1,13 +1,11 @@ //DIAGNOSTIC_TEST:SIMPLE:-E -// NOTE! This test should *fail*, if preprocessor is working correctly! - #define A a #define B b // Correct output // "A B" -// Slang output +// Old Slang output // # a b ; #define STRINGIFY(x) #x diff --git a/tests/current-bugs/preproc-stringify-1.slang.expected b/tests/preprocessor/preproc-stringify-1.slang.expected index 53efce117..dac301880 100644 --- a/tests/current-bugs/preproc-stringify-1.slang.expected +++ b/tests/preprocessor/preproc-stringify-1.slang.expected @@ -2,5 +2,5 @@ result code = 0 standard error = { } standard output = { -# a b ; +"A B" ; } diff --git a/tests/preprocessor/warning.slang.expected b/tests/preprocessor/warning.slang.expected index 66b1e5f17..23efd1479 100644 --- a/tests/preprocessor/warning.slang.expected +++ b/tests/preprocessor/warning.slang.expected @@ -1,6 +1,6 @@ result code = 0 standard error = { -tests/preprocessor/warning.slang(9): warning 15901: #warning: You wouldn't like me when I'm angry... +tests/preprocessor/warning.slang(9): warning 15901: #warning: You wouldn't like me when I'm angry... #warning You wouldn't like me when I'm angry... ^~~~~~~ } diff --git a/tools/slang-cpp-extractor/parser.cpp b/tools/slang-cpp-extractor/parser.cpp index 9a2227b21..97c156f2f 100644 --- a/tools/slang-cpp-extractor/parser.cpp +++ b/tools/slang-cpp-extractor/parser.cpp @@ -1219,7 +1219,7 @@ SlangResult Parser::parse(SourceOrigin* sourceOrigin, const Options* options) m_currentScope = m_nodeTree->m_rootNode; lexer.initialize(sourceView, m_sink, m_nodeTree->m_namePool, manager->getMemoryArena()); - m_tokenList = lexer.lexAllTokens(); + m_tokenList = lexer.lexAllSemanticTokens(); // See if there were any errors if (m_sink->getErrorCount()) { @@ -1350,8 +1350,13 @@ SlangResult Parser::parse(SourceOrigin* sourceOrigin, const Options* options) { // We are just going to ignore all of these for now.... m_reader.advanceToken(); - while (m_reader.peekTokenType() != TokenType::EndOfDirective && m_reader.peekTokenType() != TokenType::EndOfFile) + for (;;) { + auto t = m_reader.peekToken(); + if (t.type == TokenType::EndOfFile || (t.flags & TokenFlag::AtStartOfLine)) + { + break; + } m_reader.advanceToken(); } break; |
