From b2febc7966c2533f756a6829656423f05b2f21e5 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Fri, 11 Aug 2017 12:34:58 -0700 Subject: Look up declaration keywords using ordinary scoping. The existing parser code was doing string-based matching on the lookahead token to figure out how to parse a declaration, e.g.: ``` if(lookAhead == "struct") { /* do struct thing */ } else if(lookAhead == "interface") { /* do interface thing * } ... ``` That approach has some annoying down-sides: - It is slower than it needs to be - It is annoying to deal with cases where the available declaration keywords might differ by language - Most importantly, it is not possible for us to introduce "extended" keywords that the user can make use of, but which can be ignored by the user and treated as an ordinary identifier. That last part is important. Suppose the user wanted to have a local variable named `import`, but we also had a Slang extension that added an `import` keyword. Then a line of code like `import += 1` would lead to a failure because we'd try to parse an import declaration, even when it is obvious that the user meant their local variable. This would mean that Slang can't parse existing user code that might clash with syntax extensions. This issue is the reason why we currently have keywords like `__import`. A traditional solution in a compiler is to map keywords to distinct token codes as part of lexing, which eliminates the first conern (performance) because now we can dispatch with `switch`. It can also aleviate the second concern if we add/remove names from the string->code mapping based on language (the rest of the parsing logic doesn't have to know about keywords being added/removed). The solution we go for here is more aggressive. Instead of mapping keyword names to special token codes during lexing, we instead introduce logical "syntax declarations" into the AST, which are looked up using the ordinary scoping rules of the language. Depending on what code is imported into the scope where parsing is going on, different keywords may then be visible. This solves our last concern, since a user-defined variable that just happens to use the same name as a keyword is now allowed to shadow the imported declaration for syntax (this is akin to, e.g., Scheme where there really aren't any "keywords"). This also opens the door to the possibility of eventually allowing user to define their own syntax (again, like Scheme). For now I'm only using this for the declaration keywords. With this change it should be pretty easy to also add statement keywords in the same fashion. --- source/slang/parser.cpp | 492 +++++++++++++++++++++++------------------------- 1 file changed, 238 insertions(+), 254 deletions(-) (limited to 'source/slang/parser.cpp') diff --git a/source/slang/parser.cpp b/source/slang/parser.cpp index cc6a0dd6f..dd379df0e 100644 --- a/source/slang/parser.cpp +++ b/source/slang/parser.cpp @@ -528,18 +528,15 @@ namespace Slang return ParseProgram(); } - RefPtr ParseTypeDef(Parser* parser) + RefPtr ParseTypeDef(Parser* parser) { RefPtr typeDefDecl = new TypeDefDecl(); - typeDefDecl->Position = parser->tokenReader.PeekLoc(); - - // Consume the `typedef` keyword - parser->ReadToken("typedef"); // TODO(tfoley): parse an actual declarator auto type = parser->ParseTypeExp(); auto nameToken = parser->ReadToken(TokenType::Identifier); + typeDefDecl->Position = nameToken.Position; typeDefDecl->Name = nameToken; typeDefDecl->type = type; @@ -871,13 +868,11 @@ namespace Slang return parser->tokenReader.PeekTokenType(); } - static RefPtr parseImportDecl( + static RefPtr parseImportDecl( Parser* parser) { parser->haveSeenAnyImportDecls = true; - parser->ReadToken("__import"); - auto decl = new ImportDecl(); decl->scope = parser->currentScope; @@ -1758,7 +1753,8 @@ namespace Slang } static RefPtr ParseHLSLBufferDecl( - Parser* parser) + Parser* parser, + String bufferWrapperTypeName) { // An HLSL declaration of a constant buffer like this: // @@ -1774,22 +1770,7 @@ namespace Slang // declaration is made to be "transparent" so that lookup // will see through it to the members inside. - // We first look at the declaration keywrod to determine - // the type of buffer to declare: - String bufferWrapperTypeName; - SourceLoc bufferWrapperTypeNamePos = parser->tokenReader.PeekLoc(); - if (AdvanceIf(parser, "cbuffer")) - { - bufferWrapperTypeName = "ConstantBuffer"; - } - else if (AdvanceIf(parser, "tbuffer")) - { - bufferWrapperTypeName = "TextureBuffer"; - } - else - { - Unexpected(parser); - } + auto bufferWrapperTypeNamePos = parser->tokenReader.PeekLoc(); // We are going to represent each buffer as a pair of declarations. // The first is a type declaration that holds all the members, while @@ -1872,7 +1853,19 @@ namespace Slang return bufferVarDecl; } - + + static RefPtr parseHLSLCBufferDecl( + Parser* parser) + { + return ParseHLSLBufferDecl(parser, "ConstantBuffer"); + } + + static RefPtr parseHLSLTBufferDecl( + Parser* parser) + { + return ParseHLSLBufferDecl(parser, "TextureBuffer"); + } + static void removeModifier( Modifiers& modifiers, RefPtr modifier) @@ -2098,13 +2091,13 @@ namespace Slang } } - static RefPtr ParseGenericDecl( + static RefPtr ParseGenericDecl( Parser* parser) { RefPtr decl = new GenericDecl(); parser->FillPosition(decl.Ptr()); parser->PushScope(decl.Ptr()); - parser->ReadToken("__generic"); + parser->ReadToken(TokenType::OpLess); parser->genericDepth++; while (!parser->LookAheadToken(TokenType::OpGreater)) @@ -2123,17 +2116,20 @@ namespace Slang // A generic decl hijacks the name of the declaration // it wraps, so that lookup can find it. - decl->Name = decl->inner->Name; + if (decl->inner) + { + decl->Name = decl->inner->Name; + decl->Position = decl->inner->Position; + } parser->PopScope(); return decl; } - static RefPtr ParseExtensionDecl(Parser* parser) + static RefPtr ParseExtensionDecl(Parser* parser) { RefPtr decl = new ExtensionDecl(); parser->FillPosition(decl.Ptr()); - parser->ReadToken("__extension"); decl->targetType = parser->ParseTypeExp(); parseAggTypeDeclBody(parser, decl.Ptr()); @@ -2159,11 +2155,10 @@ namespace Slang } } - static RefPtr parseInterfaceDecl(Parser* parser) + static RefPtr parseInterfaceDecl(Parser* parser) { RefPtr decl = new InterfaceDecl(); parser->FillPosition(decl.Ptr()); - parser->ReadToken("interface"); decl->Name = parser->ReadToken(TokenType::Identifier); parseOptionalInheritanceClause(parser, decl.Ptr()); @@ -2173,11 +2168,10 @@ namespace Slang return decl; } - static RefPtr ParseConstructorDecl(Parser* parser) + static RefPtr ParseConstructorDecl(Parser* parser) { RefPtr decl = new ConstructorDecl(); parser->FillPosition(decl.Ptr()); - parser->ReadToken("__init"); parseParameterList(parser, decl); @@ -2221,11 +2215,10 @@ namespace Slang return decl; } - static RefPtr ParseSubscriptDecl(Parser* parser) + static RefPtr ParseSubscriptDecl(Parser* parser) { RefPtr decl = new SubscriptDecl(); parser->FillPosition(decl.Ptr()); - parser->ReadToken("__subscript"); // TODO: the use of this name here is a bit magical... decl->Name.Content = "operator[]"; @@ -2257,13 +2250,10 @@ namespace Slang } // Parse a declaration of a new modifier keyword - static RefPtr parseModifierDecl(Parser* parser) + static RefPtr parseModifierDecl(Parser* parser) { RefPtr decl = new ModifierDecl(); - // read the `__modifier` keyword - parser->ReadToken(TokenType::Identifier); - parser->ReadToken(TokenType::LParent); decl->classNameToken = parser->ReadToken(TokenType::Identifier); parser->ReadToken(TokenType::RParent); @@ -2293,6 +2283,44 @@ namespace Slang } } + static Token advanceToken(Parser* parser) + { + return parser->ReadToken(); + } + + static Token peekToken(Parser* parser) + { + return parser->tokenReader.PeekToken(); + } + + static SyntaxDecl* tryLookUpSyntaxDecl( + Parser* parser, + String const& name) + { + // Let's look up the name and see what we find. + + auto lookupResult = LookUp( + parser->getSession(), + nullptr, // no semantics visitor available yet + name, + parser->currentScope); + + // If we didn't find anything, or the result was overloaded, + // then we aren't going to be able to extract a single decl. + if(!lookupResult.isValid() || lookupResult.isOverloaded()) + return nullptr; + + auto decl = lookupResult.item.declRef.getDecl(); + if( auto syntaxDecl = dynamic_cast(decl) ) + { + return syntaxDecl; + } + else + { + return nullptr; + } + } + static RefPtr ParseDeclWithModifiers( Parser* parser, ContainerDecl* containerDecl, @@ -2302,47 +2330,105 @@ namespace Slang auto loc = parser->tokenReader.PeekLoc(); - // TODO: actual dispatch! - if (parser->LookAheadToken("struct")) - decl = ParseDeclaratorDecl(parser, containerDecl); - else if (parser->LookAheadToken("class")) - decl = ParseDeclaratorDecl(parser, containerDecl); - else if (parser->LookAheadToken("typedef")) - decl = ParseTypeDef(parser); - else if (parser->LookAheadToken("cbuffer") || parser->LookAheadToken("tbuffer")) - decl = ParseHLSLBufferDecl(parser); - else if (parser->LookAheadToken("__generic")) - decl = ParseGenericDecl(parser); - else if (parser->LookAheadToken("__extension")) - decl = ParseExtensionDecl(parser); - else if (parser->LookAheadToken("__init")) - decl = ParseConstructorDecl(parser); - else if (parser->LookAheadToken("__subscript")) - decl = ParseSubscriptDecl(parser); - else if (parser->LookAheadToken("interface")) - decl = parseInterfaceDecl(parser); - else if(parser->LookAheadToken("__modifier")) - decl = parseModifierDecl(parser); - else if(parser->LookAheadToken("__import")) - decl = parseImportDecl(parser); - else if(parser->LookAheadToken(TokenType::PoundImport)) - decl = parsePoundImportDecl(parser); - else if (AdvanceIf(parser, TokenType::Semicolon)) + switch (peekTokenType(parser)) { - decl = new EmptyDecl(); - decl->Position = loc; - } - // GLSL requires that we be able to parse "block" declarations, - // which look superficially similar to declarator declarations - else if( parser->LookAheadToken(TokenType::Identifier) - && parser->LookAheadToken(TokenType::LBrace, 1) ) - { - decl = parseGLSLBlockDecl(parser, modifiers); - } - else - { - // Default case: just parse a declarator-based declaration + case TokenType::Identifier: + { + // A declaration that starts with an identifier might be: + // + // - A keyword-based declaration (e.g., `cbuffer ...`) + // - The begining of a type in a declarator-based declaration (e.g., `int ...`) + // - A GLSL block declaration (e.g., `uniform Foo { ... }`) + + // Let's deal with the GLSL block case first. This is something like: + // + // uniform Foo { ... }; + // + // The `uniform` keyword has already been parsed as a modifier, + // so the identifier we are looking at is `Foo`. If the token + // after that is `{`, we assume this is a block. + // + // Of course, we only want to allow this syntax when parsing GLSL... + if (parser->translationUnit->sourceLanguage == SourceLanguage::GLSL) + { + if( parser->LookAheadToken(TokenType::LBrace, 1) ) + { + decl = parseGLSLBlockDecl(parser, modifiers); + break; + } + } + + // We will look up the name that was given, and try to see + // if it names a syntactic keyword that will tell us how to parse + // things. + auto nameToken = peekToken(parser); + auto name = nameToken.Content; + auto syntaxDecl = tryLookUpSyntaxDecl(parser, name); + + // TODO: confirm that the syntax is for a declaration? + if (syntaxDecl && syntaxDecl->syntaxClass.isSubClassOf()) + { + // Consume the keyword token, so that the callback doesn't + // need to deal with it. + advanceToken(parser); + + auto parsedSyntax = syntaxDecl->parserCallback(parser); + if (parsedSyntax) + { + if (!parsedSyntax->Position.isValid()) + { + parsedSyntax->Position = nameToken.Position; + } + + auto parsedDecl = parsedSyntax->As(); + if (parsedDecl) + { + decl = parsedDecl; + } + else + { + // TODO: diagnose! + } + } + + } + else + { + // If the idenfier given doesn't name a declaration keyword, + // then we will try to parse things as a declarator decl. + + decl = ParseDeclaratorDecl(parser, containerDecl); + break; + } + + } + break; + + // It is valid in HLSL/GLSL to have an "empty" declaration + // that consists of just a semicolon. In particular, this + // gets used a lot in GLSL to attach custom semantics to + // shader input or output. + // + case TokenType::Semicolon: + { + advanceToken(parser); + + decl = new EmptyDecl(); + decl->Position = loc; + } + break; + + // The preprocessor will generate a custom token to represent + // the site of a `#import` directive, so that we can catch + // it downstream in the parser, here. + case TokenType::PoundImport: + decl = parsePoundImportDecl(parser); + break; + + // If nothing else matched, we try to parse an "ordinary" declarator-based declaration + default: decl = ParseDeclaratorDecl(parser, containerDecl); + break; } if (decl) @@ -3669,180 +3755,6 @@ namespace Slang RefPtr Parser::ParseLeafExpression() { return parsePrefixExpr(this); - -#if 0 - RefPtr rs; - if (LookAheadToken(TokenType::OpInc) || - LookAheadToken(TokenType::OpDec) || - LookAheadToken(TokenType::OpNot) || - LookAheadToken(TokenType::OpBitNot) || - LookAheadToken(TokenType::OpSub)) - { - RefPtr unaryExpr = new PrefixExpr(); - FillPosition(unaryExpr.Ptr()); - unaryExpr->FunctionExpr = parseOperator(this); - unaryExpr->Arguments.Add(ParseLeafExpression()); - rs = unaryExpr; - return rs; - } - - if (LookAheadToken(TokenType::LParent)) - { - ReadToken(TokenType::LParent); - RefPtr expr; - if (peekTypeName(this) && LookAheadToken(TokenType::RParent, 1)) - { - RefPtr tcexpr = new TypeCastExpr(); - FillPosition(tcexpr.Ptr()); - tcexpr->TargetType = ParseTypeExp(); - ReadToken(TokenType::RParent); - tcexpr->Expression = ParseExpression(Precedence::Multiplicative); // Note(tfoley): need to double-check this - expr = tcexpr; - } - else - { - expr = ParseExpression(); - ReadToken(TokenType::RParent); - } - rs = expr; - } - else if( LookAheadToken(TokenType::LBrace) ) - { - RefPtr initExpr = new InitializerListExpr(); - FillPosition(initExpr.Ptr()); - - // Initializer list - ReadToken(TokenType::LBrace); - - List> exprs; - - for(;;) - { - if(AdvanceIfMatch(this, TokenType::RBrace)) - break; - - auto expr = ParseArgExpr(); - if( expr ) - { - initExpr->args.Add(expr); - } - - if(AdvanceIfMatch(this, TokenType::RBrace)) - break; - - ReadToken(TokenType::Comma); - } - rs = initExpr; - } - - else if (LookAheadToken(TokenType::IntegerLiteral) || - LookAheadToken(TokenType::FloatingPointLiteral)) - { - RefPtr constExpr = new ConstantExpr(); - auto token = tokenReader.AdvanceToken(); - FillPosition(constExpr.Ptr()); - if (token.type == TokenType::IntegerLiteral) - { - constExpr->ConstType = ConstantExpr::ConstantType::Int; - constExpr->IntValue = StringToInt(token.Content); - } - else if (token.type == TokenType::FloatingPointLiteral) - { - constExpr->ConstType = ConstantExpr::ConstantType::Float; - constExpr->FloatValue = (FloatingPointLiteralValue) StringToDouble(token.Content); - } - rs = constExpr; - } - else if (LookAheadToken("true") || LookAheadToken("false")) - { - RefPtr constExpr = new ConstantExpr(); - auto token = tokenReader.AdvanceToken(); - FillPosition(constExpr.Ptr()); - constExpr->ConstType = ConstantExpr::ConstantType::Bool; - constExpr->IntValue = token.Content == "true" ? 1 : 0; - rs = constExpr; - } - else if (LookAheadToken(TokenType::Identifier)) - { - RefPtr varExpr = new VarExpr(); - varExpr->scope = currentScope.Ptr(); - FillPosition(varExpr.Ptr()); - auto token = ReadToken(TokenType::Identifier); - varExpr->name = token.Content; - rs = varExpr; - } - - while (!tokenReader.IsAtEnd() && - (LookAheadToken(TokenType::OpInc) || - LookAheadToken(TokenType::OpDec) || - LookAheadToken(TokenType::Dot) || - LookAheadToken(TokenType::LBracket) || - LookAheadToken(TokenType::LParent))) - { - if (LookAheadToken(TokenType::OpInc)) - { - RefPtr unaryExpr = new PostfixExpr(); - FillPosition(unaryExpr.Ptr()); - unaryExpr->FunctionExpr = parseOperator(this); - unaryExpr->Arguments.Add(rs); - rs = unaryExpr; - } - else if (LookAheadToken(TokenType::OpDec)) - { - RefPtr unaryExpr = new PostfixExpr(); - FillPosition(unaryExpr.Ptr()); - unaryExpr->FunctionExpr = parseOperator(this); - unaryExpr->Arguments.Add(rs); - rs = unaryExpr; - } - else if (LookAheadToken(TokenType::LBracket)) - { - RefPtr indexExpr = new IndexExpr(); - indexExpr->BaseExpression = rs; - FillPosition(indexExpr.Ptr()); - ReadToken(TokenType::LBracket); - indexExpr->IndexExpression = ParseExpression(); - ReadToken(TokenType::RBracket); - rs = indexExpr; - } - else if (LookAheadToken(TokenType::LParent)) - { - RefPtr invokeExpr = new InvokeExpr(); - invokeExpr->FunctionExpr = rs; - FillPosition(invokeExpr.Ptr()); - ReadToken(TokenType::LParent); - while (!tokenReader.IsAtEnd()) - { - if (!LookAheadToken(TokenType::RParent)) - invokeExpr->Arguments.Add(ParseArgExpr()); - else - { - break; - } - if (!LookAheadToken(TokenType::Comma)) - break; - ReadToken(TokenType::Comma); - } - ReadToken(TokenType::RParent); - rs = invokeExpr; - } - else if (LookAheadToken(TokenType::Dot)) - { - RefPtr memberExpr = new MemberExpr(); - memberExpr->scope = currentScope.Ptr(); - FillPosition(memberExpr.Ptr()); - memberExpr->BaseExpression = rs; - ReadToken(TokenType::Dot); - memberExpr->name = ReadToken(TokenType::Identifier).Content; - rs = memberExpr; - } - } - if (!rs) - { - sink->diagnose(tokenReader.PeekLoc(), Diagnostics::syntaxError); - } - return rs; -#endif } // Parse a source file into an existing translation unit @@ -3858,4 +3770,76 @@ namespace Slang return parser.parseSourceFile(translationUnit->SyntaxNode.Ptr()); } + + static void addBuiltinSyntaxImpl( + Session* /*session*/, + Scope* scope, + char const* nameText, + SyntaxParseCallback callback, + SyntaxClass syntaxClass) + { + String name(nameText); + + RefPtr syntaxDecl = new SyntaxDecl(); + syntaxDecl->Name.Content = name; + syntaxDecl->syntaxClass = syntaxClass; + syntaxDecl->parserCallback = callback; + + AddMember(scope, syntaxDecl); + } + + template + static void addBuiltinSyntax( + Session* session, + Scope* scope, + char const* name, + SyntaxParseCallback callback) + { + addBuiltinSyntaxImpl(session, scope, name, callback, getClass()); + } + + RefPtr populateBaseLanguageModule( + Session* session, + RefPtr scope) + { + RefPtr moduleDecl = new ModuleDecl(); + scope->containerDecl = moduleDecl; + + addBuiltinSyntax(session, scope, "typedef", &ParseTypeDef); + addBuiltinSyntax(session, scope, "cbuffer", &parseHLSLCBufferDecl); + addBuiltinSyntax(session, scope, "tbuffer", &parseHLSLTBufferDecl); + addBuiltinSyntax(session, scope, "__generic", &ParseGenericDecl); + addBuiltinSyntax(session, scope, "__extension", &ParseExtensionDecl); + addBuiltinSyntax(session, scope, "__init", &ParseConstructorDecl); + addBuiltinSyntax(session, scope, "__subscript", &ParseSubscriptDecl); + addBuiltinSyntax(session, scope, "interface", &parseInterfaceDecl); + addBuiltinSyntax(session, scope, "__modifier", &parseModifierDecl); + addBuiltinSyntax(session, scope, "__import", &parseImportDecl); + +#if 0 + // TODO: actual dispatch! + if (parser->LookAheadToken("struct")) + decl = ParseDeclaratorDecl(parser, containerDecl); + else if (parser->LookAheadToken("class")) + decl = ParseDeclaratorDecl(parser, containerDecl); + else if (parser->LookAheadToken("__generic")) + decl = ParseGenericDecl(parser); + else if (parser->LookAheadToken("__extension")) + decl = ParseExtensionDecl(parser); + else if (parser->LookAheadToken("__init")) + decl = ParseConstructorDecl(parser); + else if (parser->LookAheadToken("__subscript")) + decl = ParseSubscriptDecl(parser); + else if (parser->LookAheadToken("interface")) + decl = parseInterfaceDecl(parser); + else if(parser->LookAheadToken("__modifier")) + decl = parseModifierDecl(parser); + else if(parser->LookAheadToken("__import")) + decl = parseImportDecl(parser); +#endif + + + return moduleDecl; + } + } -- cgit v1.2.3