From 5c220292d6ac2674942bb5f1bb09fe1817151c11 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Tue, 7 Nov 2017 09:25:41 -0800 Subject: Fixes for name mangling/demangling The source of a lot of these changes is that our current strategy for dealing with "builtin" operations when emitting HLSL from the IR is to de-mangle the mangled name of an operation, and then emit HLSL code for a function call to an operation with that de-mangled name. This change introduces a few fixups for that work: - It adds support for parsing the mangled names of generics (specialized and unspecialized) - It adds logic for detecting when the operation being invoked is a member function - This is currently a bit ugly, since we compare the number of actual arguments we have in the IR against the number of parameters declared for the callee, and if they don't match we assume we have an extra `this` argument. On the mangling side, we add (hacky) support for mangling a function name when its types involve generic parameters, e.g.: ``` __generic T length(vector v); ``` In this case the mangled name of the function needs to include a mangling for the type `vector` which means it also needs to include a mangling for `N`. The reason I describe this support as "hacky" is because we really shouldn't be reproducing the names `T` or `N` in the mangled symbol name. By doing so we make it so that a user changing the name of a generic parameter would break (IR) binary compatibility with existing code that was separately compiled. I've included comments in the code about a better way to handle this, but it isn't a priorit right now since binary compatibility isn't something meaningful until we start emitting usable bytecode modules. --- source/slang/emit.cpp | 144 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 135 insertions(+), 9 deletions(-) (limited to 'source/slang/emit.cpp') diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index d95946204..7e4fca4e5 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -4677,7 +4677,7 @@ emitDeclImpl(decl, nullptr); if(c == '0') return 0; - int count = 0; + UInt count = 0; for(;;) { count = count*10 + c - '0'; @@ -4689,18 +4689,117 @@ emitDeclImpl(decl, nullptr); } } + void readGenericParam() + { + switch(peek()) + { + case 'T': + get(); + break; + + default: + SLANG_UNEXPECTED("bad name mangling"); + break; + } + } + + void readGenericParams() + { + expect("g"); + UInt paramCount = readCount(); + for(UInt pp = 0; pp < paramCount; pp++) + { + readGenericParam(); + } + } + + void readSimpleIntVal() + { + int c = peek(); + if(isDigit(c)) + { + get(); + } + else + { + readVal(); + } + } + + void readType() + { + int c = peek(); + switch(c) + { + case 'V': + case 'b': + case 'i': + case 'u': + case 'U': + case 'h': + case 'f': + case 'd': + get(); + break; + + case 'v': + get(); + readSimpleIntVal(); + readType(); + break; + + default: + // TODO: need to read a named type + // here... + break; + } + } + + void readVal() + { + // TODO: handle other cases here + readType(); + } + + void readGenericArg() + { + readVal(); + } + + void readGenericArgs() + { + expect("G"); + UInt argCount = readCount(); + for(UInt aa = 0; aa < argCount; aa++) + { + readGenericArg(); + } + } + UnownedStringSlice readSimpleName() { UnownedStringSlice result; for(;;) { int c = peek(); + + if(c == 'g') + { + readGenericParams(); + continue; + } + else if(c == 'G') + { + readGenericArgs(); + continue; + } + if(!isDigit((char)c)) return result; // Read the length part - int count = readCount(); - if(count > (end_ - cursor_)) + UInt count = readCount(); + if(count > UInt(end_ - cursor_)) { SLANG_UNEXPECTED("bad name mangling"); UNREACHABLE_RETURN(result); @@ -4710,6 +4809,12 @@ emitDeclImpl(decl, nullptr); cursor_ += count; } } + + UInt readParamCount() + { + expect("p"); + return readCount(); + } }; void emitIntrinsicCallExpr( @@ -4726,16 +4831,37 @@ emitDeclImpl(decl, nullptr); auto name = um.readSimpleName(); - // TODO: need to detect if name represents - // a member function, etc. + // The mangled function name currently records + // the number of explicit parameters, and thus + // doesn't include the implicit `this` parameter. + // We can compare the argument and parameter counts + // to figure out whether we have a member function call. + UInt paramCount = um.readParamCount(); + + // For a call with N arguments, the instruction will + // have N+1 operands. + UInt operandCount = inst->getArgCount(); + UInt argCount = operandCount - 1; + UInt operandIndex = 1; + + if(argCount != paramCount) + { + // Looks like a member function call + emit("("); + emitIROperand(ctx, inst->getArg(operandIndex)); + emit(")."); + + operandIndex++; + } emit(name); emit("("); - UInt argCount = inst->getArgCount(); - for( UInt aa = 1; aa < argCount; ++aa ) + bool first = true; + for(; operandIndex < operandCount; ++operandIndex ) { - if(aa != 1) emit(", "); - emitIROperand(ctx, inst->getArg(aa)); + if(!first) emit(", "); + emitIROperand(ctx, inst->getArg(operandIndex)); + first = false; } emit(")"); } -- cgit v1.2.3 From 97a1a95b6192599e038a26704756a914368f3d3a Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Tue, 7 Nov 2017 09:55:53 -0800 Subject: Try to fix up IR emit for subscript calls This code isn't especially useful right now since most of the important subscripts are still special-cased with `__intrinsic_op`, but the idea is that if we de-mangle an intrinsic operation's name and see it is called `operator[]` then we are probably calling a subscript, and should emit an appropriate expression. Aside: this change has pointed out to me that our current name mangling isn't properly handling non-alphanumeric characters, so we'll be in trouble as soon as we have non-intrinsic subscripts, operators, etc. --- source/slang/emit.cpp | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'source/slang/emit.cpp') diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index 7e4fca4e5..615fb47a0 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -4664,7 +4664,7 @@ emitDeclImpl(decl, nullptr); expect("_S"); } - int readCount() + UInt readCount() { int c = peek(); if(!isDigit((char)c)) @@ -4822,15 +4822,50 @@ emitDeclImpl(decl, nullptr); IRCall* inst, IRFunc* func) { - // TODO: we need to inspect the mangled name, - // and construct a suitable expression from it... + // For a call with N arguments, the instruction will + // have N+1 operands. We will start consuming operands + // starting at the index 1. + UInt operandCount = inst->getArgCount(); + UInt argCount = operandCount - 1; + UInt operandIndex = 1; - UnmangleContext um(func->mangledName); + // Our current strategy for dealing with intrinsic + // calls is to "un-mangle" the mangled name, in + // order to figure out what the user was originally + // calling. This is a bit messy, and there might + // be better strategies (including just stuffing + // a pointer to the original decl onto the callee). + UnmangleContext um(func->mangledName); um.startUnmangling(); + // We'll read through the qualified name of the + // symbol (e.g., `Texture2D.Sample`) and then + // only keep the last segment of the name (e.g., + // the `Sample` part). auto name = um.readSimpleName(); + // We will special-case some names here, that + // represent callable declarations that aren't + // ordinary functions, and thus may use different + // syntax. + if(name == "operator[]") + { + // The user is invoking a built-in subscript operator + emit("("); + emitIROperand(ctx, inst->getArg(operandIndex++)); + emit(")["); + emitIROperand(ctx, inst->getArg(operandIndex++)); + emit("]"); + + if(operandIndex < operandCount) + { + emit(" = "); + emitIROperand(ctx, inst->getArg(operandIndex++)); + } + return; + } + // The mangled function name currently records // the number of explicit parameters, and thus // doesn't include the implicit `this` parameter. @@ -4838,12 +4873,6 @@ emitDeclImpl(decl, nullptr); // to figure out whether we have a member function call. UInt paramCount = um.readParamCount(); - // For a call with N arguments, the instruction will - // have N+1 operands. - UInt operandCount = inst->getArgCount(); - UInt argCount = operandCount - 1; - UInt operandIndex = 1; - if(argCount != paramCount) { // Looks like a member function call -- cgit v1.2.3 From f4c4f63c0cfad93b1eacf9300eb8e06d2c78ccc9 Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Tue, 7 Nov 2017 10:02:10 -0800 Subject: Fix for emitting subscript calls in HLSL/GLSL The old approach was relying on an `__intrinsic_op` modifier to tell us we need to do something special with an `InvokeExpr`, but a previous change removed a bunch of those modifiers. Instead, we will now check for calls to subscript declarations as part of the normal flow of emitting *any* call, similar to what is done for constructor calls already. Eventually we should be able to eliminate the special case in the `__intrinsic_op` path, but I'm holding off on that because the AST emit logic can probably be cleaned up a *lot* once it doesn't have to be used for cross-compilation as well. --- source/slang/emit.cpp | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'source/slang/emit.cpp') diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index 615fb47a0..c67bb7b29 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -1614,6 +1614,32 @@ struct EmitVisitor } } + void emitSimpleSubscriptCallExpr( + RefPtr callExpr, + EOpInfo /*outerPrec*/) + { + auto funcExpr = callExpr->FunctionExpr; + + // We expect any subscript operation to be invoked as a member, + // so the function expression had better be in the correct form. + auto memberExpr = funcExpr.As(); + if(!memberExpr) + { + SLANG_UNEXPECTED("subscript needs base expression"); + } + + Emit("("); + EmitExpr(memberExpr->BaseExpression); + Emit(")["); + UInt argCount = callExpr->Arguments.Count(); + for (UInt aa = 0; aa < argCount; ++aa) + { + if (aa != 0) Emit(", "); + EmitExpr(callExpr->Arguments[aa]); + } + Emit("]"); + } + // Emit a call expression that doesn't involve any special cases, // just an expression of the form `f(a0, a1, ...)` void emitSimpleCallExpr( @@ -1632,6 +1658,18 @@ struct EmitVisitor emitSimpleConstructorCallExpr(callExpr, outerPrec); return; } + + if(auto acessorDeclRef = declRef.As()) + { + declRef = acessorDeclRef.GetParent(); + } + + if(auto subscriptDeclRef = declRef.As()) + { + emitSimpleSubscriptCallExpr(callExpr, outerPrec); + return; + } + } // Once we've ruled out constructor calls, we can move on -- cgit v1.2.3 From ccea5702442a7a8303e6735a038be86939c1ce7a Mon Sep 17 00:00:00 2001 From: Tim Foley Date: Tue, 7 Nov 2017 10:22:53 -0800 Subject: Emit pointer-type parameters as out params The IR encodes `out` and `in out` function parameters as pointer types, so the emit logic needs to handle it. We had code to handle translation of pointers types into `out` declarations for function *declarations* but weren't handling it for function *definitions*. This change unifies the logic so that it is shared by function definitions and decalrations. This change does *not* deal with the following issues that need to be addressed sometime soon-ish: - We currently always translate pointers into `out`, even if they should be `in out`. This is obviously wrong. - If/when we eventually have targets that support true pointers (e.g., CUDA, NVIDIA OpenGL, etc.) we'll need a way to tell the difference between an `in` pointer parameter, and an `out` parameter. Both of these issues are meant to be addressed by having a few special cases of pointer types, for the `out` and `in out` cases, and only translating those (not all pointers). We need to plumb those through the IR more completely, but I'm not dealing with that here. --- source/slang/emit.cpp | 51 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 21 deletions(-) (limited to 'source/slang/emit.cpp') diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index c67bb7b29..617e25d71 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -5698,7 +5698,8 @@ emitDeclImpl(decl, nullptr); emit(", "); auto paramName = getIRName(pp); - emitIRType(ctx, pp->getType(), paramName); + auto paramType = pp->getType(); + emitIRParamType(ctx, paramType, paramName); emitIRSemantics(ctx, pp); } @@ -5724,6 +5725,33 @@ emitDeclImpl(decl, nullptr); } } + void emitIRParamType( + EmitContext* ctx, + Type* type, + String const& name) + { + // An `out` or `inout` parameter will have been + // encoded as a parameter of pointer type, so + // we need to decode that here. + // + if( auto ptrType = type->As() ) + { + // TODO: we need a way to distinguish `out` + // from `inout`. The easiest way to do + // that might be to have each be a distinct + // sub-case of `IRPtrType` - this would also + // ensure that they can be distinguished from + // real pointers when the user means to use + // them. + + emit("out "); + + type = ptrType->getValueType(); + } + + emitIRType(ctx, type, name); + } + void emitIRFuncDecl( EmitContext* ctx, IRFunc* func) @@ -5771,26 +5799,7 @@ emitDeclImpl(decl, nullptr); paramName.append(pp); auto paramType = funcType->getParamType(pp); - // An `out` or `inout` parameter will have been - // encoded as a parameter of pointer type, so - // we need to decode that here. - // - if( auto ptrType = paramType->As() ) - { - // TODO: we need a way to distinguish `out` - // from `inout`. The easiest way to do - // that might be to have each be a distinct - // sub-case of `IRPtrType` - this would also - // ensure that they can be distinguished from - // real pointers when the user means to use - // them. - - emit("out "); - - paramType = ptrType->getValueType(); - } - - emitIRType(ctx, paramType, paramName); + emitIRParamType(ctx, paramType, paramName); } emit(");\n"); } -- cgit v1.2.3