diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/slang-emit.cpp | 13 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-resources.cpp | 1023 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-resources.h | 5 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 9 | ||||
| -rw-r--r-- | source/slang/slang-ir.h | 12 |
5 files changed, 1054 insertions, 8 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 9ab42569a..09e5c9b2d 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -431,16 +431,21 @@ Result linkAndOptimizeIR( // // Many of our targets place restrictions on how certain // resource types can be used, so that having them as - // function parameters is invalid. To clean this up, - // we will try to specialize called functions based - // on the actual resources that are being passed to them - // at specific call sites. + // function parameters, reults, etc. is invalid. + // To clean this up, we apply two kinds of specialization: + // + // * Specalize call sites based on the actual resources + // that a called function will return/output. + // + // * Specialize called functions based on teh actual resources + // passed ass input at specific call sites. // // Because the legalization may depend on what target // we are compiling for (certain things might be okay // for D3D targets that are not okay for Vulkan), we // pass down the target request along with the IR. // + specializeResourceOutputs(compileRequest, targetRequest, irModule); specializeResourceParameters(compileRequest, targetRequest, irModule); // For GLSL targets, we also want to specialize calls to functions that diff --git a/source/slang/slang-ir-specialize-resources.cpp b/source/slang/slang-ir-specialize-resources.cpp index 8a917eee3..9a51351a9 100644 --- a/source/slang/slang-ir-specialize-resources.cpp +++ b/source/slang/slang-ir-specialize-resources.cpp @@ -5,6 +5,8 @@ #include "slang-ir.h" #include "slang-ir-insts.h" +#include "slang-ir-clone.h" + namespace Slang { @@ -38,6 +40,14 @@ struct ResourceParameterSpecializationCondition : FunctionCallSpecializeConditio // because dxc apparently does not treat `ConstantBuffer<T>` // as a first-class type. // + // TODO: This should not apply to CPU or CUDA, where + // `ConstantBuffer<T>` is just `T*`. Right now this + // optimization is not applying to those targets by + // coincidence (because the shader parameters are not + // globals, there is no way for the specialization to + // succeed), but eventually we should turn it off more + // carefully. + // if(as<IRUniformParameterGroupType>(type)) return true; @@ -82,4 +92,1015 @@ void specializeResourceParameters( specializeFunctionCalls(compileRequest, targetRequest, module, &condition); } -} // namesapce Slang + /// A pass to specialize resource-typed function outputs +struct ResourceOutputSpecializationPass +{ + // This pass is kind of a dual to `specializeResourceParameters()`. + // Whereas that pass identifies call sites that pass suitable argument + // values and specializes the callee functionfor each such call site, + // this pass identifies *functions* that *output* suitable values (either + // via `return` or `out`/`inout` parmeters), and then specializes the + // *call sites* for those functions based on the values that are output. + + BackEndCompileRequest* compileRequest; + TargetRequest* targetRequest; + IRModule* module; + + SharedIRBuilder sharedBuilder; + SharedIRBuilder* getSharedBuilder() { return &sharedBuilder; } + + void processModule() + { + // We start by setting up the shared IR building state. + // + sharedBuilder.module = module; + sharedBuilder.session = module->getSession(); + + // The main logic consists of iterating over all functions + // (which must appear at the global level) and specializing + // them if needed. + // + for( auto inst : module->getGlobalInsts() ) + { + auto func = as<IRFunc>(inst); + if(!func) + continue; + + processFunc(func); + } + } + + void processFunc(IRFunc* oldFunc) + { + // We don't want to waste any effort on functions that don't merit + // specialization, so the first step is to identify if the function + // has any outputs that use resource types. + // + // If there are no suitable outputs, then we bail out and skip + // the given function. + // + if(!shouldSpecializeFunc(oldFunc)) + return; + + // It is possible that we have a function that we *should* specialize + // (based on its signature), but we *cannot* yet specialize it. + // + // Rather than try to detect that situation as a pre-process, we + // will instead take the simpler approach of trying to produce + // a specialized version of `oldFunc`, and bail out if we run + // into any problems. + // + // TODO: It is possible that the allocation we perform here could + // lead to performance issues if this pass gets iterated. Eventually + // we should probably merge the resource-based specialization logic + // into a combined pass that specializes in both directions and + // also folds in SSA formation to clean up temporaries. + + // We start the specialization process by making a clone of the + // original function. + // + IRBuilder builder(&sharedBuilder); + builder.setInsertBefore(oldFunc); + IRFunc* newFunc = builder.createFunc(); + newFunc->setFullType(oldFunc->getFullType()); + + IRCloneEnv cloneEnv; + cloneInstDecorationsAndChildren( + &cloneEnv, + &sharedBuilder, + oldFunc, + newFunc); + + // At first `newFunc` is a directclone of `oldFunc`, and thus doesn't + // solve any of our problems. We will traverse `oldFunc` and specialize + // it as needed, while also collecting information that will allow + // us to rewrite call sites. + // + FuncInfo funcInfo; + if( SLANG_FAILED(specializeFunc(newFunc, funcInfo)) ) + { + // Even though we deterined that we *should* specialize + // this function, we were not able to because of some + // failure inside the body of the function. + // + // For now, we don't treat this as an error condition, + // because subsequent optimization could make it so + // that another attempt at this pass succeeds. + // + // TODO: We should iterate on this pass and the relevant + // simplifications, and keep attempting until we hit + // a steady state, before running this pass one + // last time with a flag that causes it to emit an + // error message on this falure path. + // + // TODO: Of course, we should *also* have front-end + // validation that ensures that functions that include + // targets with limited resource capabilities only use + // potentially-resource-bearing types in ways that we + // are sure we can optimize/simplify, so that the error + // messages can be front-end rather than back-end errors. + // + return; + } + + // Specialization might have changed the signature of `newFunc`, + // by adding/removing parameters, or by changing the result + // type it returns. + // + // There is a utility function called `fixUpFuncType` that can + // change the type of an IR function to match its parameter list, + // but we need to compute the desired result type manually. + // + // The result type defaults to the result type of the original + // function, but should be changed to `void` if specialization + // was applied to the function result. + // + IRType* newResultType = oldFunc->getResultType(); + if( funcInfo.result.flavor != OutputInfo::Flavor::None ) + newResultType = builder.getVoidType(); + fixUpFuncType(newFunc, newResultType); + + // At this point, we generated a `newFunc` that specialized `oldFunc`, + // and can be used instead of it at any direct call sites. + // + // We are going to replace those call sites, which will modify the + // use-def information for `oldFunc`, so we start by collecting the + // call sites into an array. + // + // Note: We are ignoring any uses that are not direct calls of `oldFunc`; + // alternative use sites might include references to the function from + // witness tables, etc. The expectation when using this pass is that + // any other uses of `oldFunc` will eventually be eliminated, so that + // only the specialized version remains. If uses of the unspecialized + // function remain, they could cause problems for downstream code generation. + // + // Targets that want to support true dynamic dispatch through witness + // tables or higher-order functions will need to either disallow resource-type + // returns from such functions, or support resource-type returns without + // the aid of this pass. + // + List<IRCall*> calls; + for( auto use = oldFunc->firstUse; use; use = use->nextUse ) + { + auto user = use->getUser(); + auto call = as<IRCall>(user); + if(!call) + continue; + if(call->getCallee() != oldFunc) + continue; + calls.add(call); + } + + // Once we have identified the calls to `oldFunc`, we will set about replacing + // them with calls to `newFunc`. + // + // Note: from this point on specialization is not allowed to fail; if the callee + // function could be specialized then all call sites to it must be specialized. + // There should be no conditions at call sites that can cause specialization to + // fail, because specialization does not depend on what is passed *in* to each + // call, but only on what gets passed *out*. + // + for( auto oldCall : calls ) + { + specializeCallSite(oldCall, newFunc, funcInfo); + } + } + + // With the overall flow of the pass described, we can now drill down + // to the subroutines and data structures that make the whole task possible. + // + // We start with the simple problm of deciding whether or not we should + // (attempt to) specialize a given function. + // + bool shouldSpecializeFunc(IRFunc* func) + { + // We cannot specialize a function if we do not have + // access to its definition. + // + if(!func->isDefinition()) + return false; + + // If any of the parameters of the function are `out` + // or `inout` parameters of a resource type, then we + // should specialize the function. + // + for( auto param : func->getParams() ) + { + auto paramType = param->getDataType(); + auto outType = as<IROutTypeBase>(paramType); + if(!outType) + continue; + auto valueType = outType->getValueType(); + if(isResourceType(valueType)) + return true; + } + + // If the result type of the function is a resource type, + // then we should specialize the function. + // + if( isResourceType(func->getResultType()) ) + { + return true; + } + + // If the above checks do not trigger, then we don't + // need/want to specialize the function after all. + // + return false; + } + + // For the above function to work, we need to be able to identify + // the resource types (and arrays thereof) that require specialization. + // + // TODO: It seems like we should be able to share a central definition + // of resource-ness. + // + // Note: we do not worry about parameters/results that are structures + // with resource-type fields, under the assumption that resource + // legalization has already been applied, exposing all resource-type + // parameters as their own top-level parameters. + // + // TODO: resource legalization may not apply correctly to function + // results and `out`/`inout` parameters, in which case we need to + // fix that pass. + // + bool isResourceType(IRType* type) + { + type = unwrapArray(type); + + if(as<IRResourceTypeBase>(type)) + return true; + + if(as<IRUniformParameterGroupType>(type)) + return true; + + if(as<IRHLSLStructuredBufferTypeBase>(type)) + return true; + + if(as<IRByteAddressBufferTypeBase>(type)) + return true; + + // TODO: more cases here? + + return false; + } + + // Once we've decided that a function is worth specializing, + // we will both transform the function and collect information + // about its outputs. + // + // The central piece of the data structure we will use is + // `OutputInfo`, which will track information about one + // (possible) function output that might need specialization. + + /// Information about a possible output of a function (return value or output parameter) + struct OutputInfo + { + enum class Flavor + { + None, ///< Not actually an output, or does not need specialization + + Undefined, ///< Needs specialization, but no suitable replacement value is known + + Replace, ///< A replacement value should be computed based on `representative` + }; + + /// What sort of output value is this? + Flavor flavor = Flavor::None; + + /// For an output value with the `Replace` flavor, the representative value to clone. + IRInst* representative = nullptr; + + /// The index of the first new output parameter introduced for this output + Index firstNewOutputParamIndex = 0; + + /// The number of new output parameters introduced for this output + Index newOutputParamCount = 0; + }; + + // The function result will be tracked as an `OutputInfo`, and + // we will define a subtype specific to that case, even though + // it does not currently need to track any additional data. + + /// A representation of the return-value output of a function + struct ReturnValueInfo : OutputInfo {}; + + // Parameters can be outputs, so they will also collect information + // into `OutputInfo`s, but they also need additional information + // related to the fact that parameters have corresponding arguments + // at call sites, and how we specialize the parameter affects + // what we need to do with those arguments. + + /// A representation of a parameter (possibly an output) of a function + struct ParamInfo : OutputInfo + { + /// Represents what to do with an existing argument at a call site. + enum class OldArgMode + { + Keep, ///< Keep the argument as-is. + Ignore, ///< Ignore the argument (eliminate it from the call) + Deref, ///< Dereference the argument; it used to be `inout` and is now just `in` + }; + + /// What do do with existing arguments at call sites + OldArgMode oldArgMode = OldArgMode::Keep; + }; + + // It is possible that specializing a function output may require + // us to add new output parameters to the function, to enable + // the caller to compute the correct output resource. + // + // For example, consider this input: + // + // Texture2D getRandomTexture() + // { + // int index = /* complicated logic */; + // return gTextures[index]; + // } + // ... + // Texture2D t = getRandomTexture(); + // + // The desired output is: + // + // void _getRandomTexture(out int i) + // { + // int index = /* complicated logic */; + // i = index; + // } + // ... + // int i; + // getRandomTexture(i); + // Texture2D t = gTextures[i]; + // + // In this case we have made the computation of `t` be + // valid for targets with limited resource support, but + // we have kept the complicated computation of `index` + // in a subroutine, so that we have not bloated the + // code more than necessary. + // + // In order to track new parameters like `i` above, + // we introduce the `NewOutputParamInfo` type. + + /// Represents a new output parameter introduced during speicalization + struct NewOutputParamInfo + { + /// The type of the new parameter's *value* (not the pointer type for an `out` parameter) + IRType* type; + }; + + // Finally, we can aggregate the types above to represent the + // collected information about a function to be specialized. + + /// Information about a function to be specialized + struct FuncInfo + { + ReturnValueInfo result; + List<ParamInfo> oldParams; + List<NewOutputParamInfo> newOutputParams; + }; + + // We now turn to the code that fills in the `FuncInfo` structure. + + Result specializeFunc(IRFunc* func, FuncInfo& outFuncInfo) + { + // To specialize a function, we attempt to specialize + // all the applicable parameters and the function result. + // + // Any failures along the way cause the whole process to fail. + + for( auto param : func->getParams() ) + { + ParamInfo paramInfo; + SLANG_RETURN_ON_FAIL(maybeSpecializeParam(param, paramInfo, outFuncInfo)); + outFuncInfo.oldParams.add(paramInfo); + } + + SLANG_RETURN_ON_FAIL(maybeSpecializeResult(func, outFuncInfo.result, outFuncInfo)); + + return SLANG_OK; + } + + // The logic for specializing a function result (the return value) is + // simpler than that for parameters, so we will look at it first. + + Result maybeSpecializeResult(IRFunc* func, ReturnValueInfo& outResultInfo, FuncInfo& ioFuncInfo) + { + // If the result type of the function isn't a resource type, + // then we don't need to specialize the result, and we + // can succeed without doing anything. + // + if( !isResourceType(func->getResultType()) ) + return SLANG_OK; + + // Otherwise, we know that we will need to produce specialization + // information in `outResultInfo` or fail in the attempt. + // + // We start with the `prepareOutputValue` subroutine which will + // handle some common logic shared with the parameter case. + // + prepareOutputValue(outResultInfo, ioFuncInfo); + + // Next, we want to identify all the places where the function + // `return`s a value, since those establish all the possible + // values for the function result. + // + // Specialization will only be possible if all of those results + // return the "same" value, or values that are in some way + // similar enough for us to collapse into a single pattern. + // + // Identifying the return sites is as simple as looking at + // the terminator instructions of all blocks in the function. + // + for( auto block : func->getBlocks() ) + { + auto returnInst = as<IRReturnVal>(block->getTerminator()); + if(!returnInst) + continue; + + auto value = returnInst->getVal(); + + IRBuilder builder(getSharedBuilder()); + builder.setInsertBefore(returnInst); + + // Given the `value` being returned, we need to determine + // if it is usable for specializing call sites to this + // function. + // + // If there is a single `return` site, then we can use + // the value returned there as a representative of the + // value returned. + // + // If there are multiple `return` sites, then any sites + // after the first will check if they are similar enough + // in structure to the first one to allow specialization + // to proceed. + // + // If we either fail to identify a specializable result + // or to match a new `return` value against previous + // ones, then the specialization process will fail. + // + SLANG_RETURN_ON_FAIL(specializeOutputValue(value, outResultInfo, ioFuncInfo)); + + // We will replace the `return <value>;` operation with + // a simple `return;`, because the new specialized function + // will have no return value. + // + builder.emitReturn(); + returnInst->removeAndDeallocate(); + } + + // If we have succeeded in gathering information from all + // the `return` sites, then we can finish up computing + // `outResultInfo` and return successfully. + // + completeOutputValue(outResultInfo, ioFuncInfo); + return SLANG_OK; + } + + void prepareOutputValue(OutputInfo& ioValueInfo, FuncInfo& ioFuncInfo) + { + // This function is called when we have identified that a particular + // value *does* represent an output, but before we have determined + // what value(s) are used for that output. + // + // As such, we set the output into a mode where its value is undefined, + // since that is the approrpiate default to use in the case where + // the function doesn't actually write anything to an output. + // + ioValueInfo.flavor = OutputInfo::Flavor::Undefined; + + // We also know that the output might require zero or more new output + // parameters, and we can set the starting index of those parameters + // based on what (if any) has been generated so far. + // + ioValueInfo.firstNewOutputParamIndex = ioFuncInfo.newOutputParams.getCount(); + } + + void completeOutputValue(OutputInfo& ioValueInfo, FuncInfo& ioFuncInfo) + { + // This function is called when we are done computing the information + // required to specialize a particular output value. + // + // We can now determine how many new output parameters, if any, + // were introduced for the sake of this output. + // + ioValueInfo.newOutputParamCount = ioFuncInfo.newOutputParams.getCount() - ioValueInfo.firstNewOutputParamIndex; + } + + Result specializeOutputValue(IRInst* value, OutputInfo& ioOutputInfo, FuncInfo& ioFuncInfo) + { + // This function is called or each `value` that might be written + // to the output identified by `ioOutputInfo`. + + // If this is the first call to for the given output, then + // the `representative` value will not have been set. + // + IRInst* representative = ioOutputInfo.representative; + if( !representative ) + { + // In that case, we will use the given `value` as the + // representative value of this output. + // + representative = value; + ioOutputInfo.flavor = OutputInfo::Flavor::Replace; + ioOutputInfo.representative = representative; + } + + // If this is *not* the first call for the given output, + // then we need to confirm that `value` and `representative` + // are suitably matched so that specialization based on `representative` + // will also suffice for `value`. + // + // At the very least, we expect them to be operations with + // the same opcode. + // + if(value->op != representative->op) + return SLANG_FAIL; + + // Furthermore, only certain instructions are amenable to + // specialization, because in general we cannot reproduce + // an instruction outside of its containing function and + // have it mean the same thing. + // + // We will specifically enumerate the case that we support, + // and expand them over time. + // + // Each supported instruction opcode might introduce new + // constraints on how `value` and `representative` must match. + // + switch( value->op ) + { + default: + // Any opcode we do not specifically enable should cause + // specialization to fail. + // + return SLANG_FAIL; + + case kIROp_GlobalParam: + // A direct reference to a global shader parameter is + // the easiest case to handle. + // + // We do need to require that all values used for the + // same output refer to the *same* global parameter. + // + if(value != representative) return SLANG_FAIL; + return SLANG_OK; + + // TODO: There are a number of additional cases that we should + // enable here. + // + // The most obvious new cases to support are: + // + // * Function parameters: if the output value is one of the + // parameter of the function, then callers can just use the + // same value they passed for the corresponding argument. + // + // * Array indexing: if the array itself is suitable to specialize, + // then it should be possible to return the array index via + // a new `out` parameter, and have the caller do the indexing. + } + + // Note: the `FuncInfo` is currently being passed in in aid of the + // array-indexing case, but because that case is not implemented + // the parameter is not being used. + // + SLANG_UNUSED(ioFuncInfo); + + // TODO: One of the hardest cases here would be `inout` parameters + // of texture type, where the result value depends on the input value(s): + // + // void swap(inout Texture2D a, inout Texture2D b) + // { + // Texture2D tmp = a; + // a = b; + // b = tmp; + // } + // + // In such a case, the value written to `a` is a `load` from parameter `b`, + // but it would be difficult to *prove* that such a load represents the + // value of the parameter on input to the function, rather than on output. + // + // It might be best if resource type legalization replaced `inout` + // parameters of resource type with distinct `in` and `out` parameters, + // to make the relationships more clear. + } + + // As discussed earlier, the case for `out`/`inout` function parameters + // is more involved than that for the function `return` value, so we + // put it off until we'd discussed the shared subroutines. + + Result maybeSpecializeParam(IRParam* param, ParamInfo& outParamInfo, FuncInfo& ioFuncInfo) + { + // We only want to specialize in the cse where the parameter + // is an `out` or `inout` (both inherit from `IROutTypeBase`), + // and the pointed-to type is a resource. + // + auto paramType = param->getDataType(); + auto outType = as<IROutTypeBase>(paramType); + if(!outType) + return SLANG_OK; + auto valueType = outType->getValueType(); + if(!isResourceType(valueType)) + return SLANG_OK; + + prepareOutputValue(outParamInfo, ioFuncInfo); + + // We are going to remove the parameter and add zero or more + // replacements, and we want any replacements to end up + // at the same point in the function signature. + // + IRBuilder paramsBuilder(getSharedBuilder()); + paramsBuilder.setInsertBefore(param); + + // We also need to introduce new instructions into the function + // body, as part of the entry block. + // + IRBlock* block = as<IRBlock>(param->getParent()); + IRBuilder bodyBuilder(getSharedBuilder()); + bodyBuilder.setInsertBefore(block->getFirstOrdinaryInst()); + + // No matter what, we create a local variable that will be + // used to replace the parameter. + // + IRVar* newVar = bodyBuilder.emitVar(valueType); + + if( as<IRInOutType>(outType) ) + { + // If the parameter is an `inout` rather than just + // an `out`, then we still need a parameter to + // be passed in, but it can be an `in` parameter + // instead, which means a `T` instead of an + // `InOut<T>`. + // + IRInst* newParam = paramsBuilder.createParam(valueType); + param->transferDecorationsTo(newParam); + + // The start of the function body should assign + // from the `in` parameter to the local variable. + // + bodyBuilder.emitStore(newVar, newParam); + + // We also need call sites to pass in an argument + // for the new `in` parameter, which will have to + // be dereferenced by one level from the original + // argument they were passing. + // + outParamInfo.oldArgMode = ParamInfo::OldArgMode::Deref; + } + else + { + // The case for a pure `out` parameter is easier: + // we don't need to initialize the local variable, + // and we don't need callers to pass in anything. + // + outParamInfo.oldArgMode = ParamInfo::OldArgMode::Ignore; + } + + // Next, we want to identify all the places in the function + // that `store` to the given output parameter. + // + // Note: this logic is subtly depending on the structure + // of how the front-end generates code for `out` and `inout` + // parameters: + // + // * The only `load` of an `inout` parameter is emitted at + // the very start of a function body, to copy it over to + // a temporary variable. + // + // * The only `store`s of an `out` or `inout` parameter are + // right before `return` instructions, to establish the + // final value of that parameter, and every `out`/`inout` + // parameter is stored along every control-flow path + // that reaches a `return`. + // + // Those invariants could easily be eliminated in a few different + // ways. Notably, if we added some more clever memory optimizations, + // then a pass could notice that we have: + // + // let val = load(inoutParam); + // ... + // store(inoutParam, val); + // + // and optimize away the `store` (at least). + // + // For now we can get away with this because we don't do very many + // interesting memory/pointer optimizations in Slang, but it is + // still worrying to have this kind of assumption baked in. + // + // TODO: We should decide on an encoding for the behavior of + // `out`/`inout` parameters that doesn't have as many "gotcha" cases. + // + List<IRStore*> stores; + for( auto use = param->firstUse; use; use = use->nextUse ) + { + auto user = use->getUser(); + auto store = as<IRStore>(user); + if(!store) + continue; + if(store->ptr.get() != param) + continue; + stores.add(store); + } + + // Having identified the places where a value is stored to + // the output parameter, we iterate over those values to + // ensure that they are all specializable and consistent + // with one another. + // + for(auto store : stores) + { + auto value = store->val.get(); + SLANG_RETURN_ON_FAIL(specializeOutputValue(value, outParamInfo, ioFuncInfo)); + + // Given our assumptions about how `store`s to output + // parameters are used, we can eliminate all these `store`s + // since the values they write won't ever be used. + // + store->removeAndDeallocate(); + } + + // It is possible that there will still be used of the parameter + // even after we eliminate all the `store`s from it (e.g., the initial + // `load` that pulls the value from an `inout` parameter), so we + // replace any remaining uses of the parameter with the local + // variable we introduced, before removing the parameter. + // + param->replaceUsesWith(newVar); + param->removeAndDeallocate(); + + completeOutputValue(outParamInfo, ioFuncInfo); + return SLANG_OK; + } + + void specializeCallSite( + IRCall* oldCall, + IRFunc* newFunc, + FuncInfo const& funcInfo) + { + // Given an existing call, we will insert a new call right before + // it and then remove the old one. + // + IRBuilder builder(getSharedBuilder()); + builder.setInsertBefore(oldCall); + + // The new callee may have additional `out` parameters that + // represent things like array indices required by the + // new lookup operations. The new call site will need + // to introduce temporaries to capture the values of + // these outputs. + // + List<IRVar*> newOutputVars; + for( auto const& newOutputParamInfo : funcInfo.newOutputParams ) + { + auto newOutputVar = builder.emitVar(newOutputParamInfo.type); + newOutputVars.add(newOutputVar); + } + + // Next, we need to build up the argument list for + // the call, by looking at the information that + // was recorded for each parameter of the original. + // + List<IRInst*> newArgs; + Index oldParamCounter = 0; + for( auto const& oldParamInfo : funcInfo.oldParams ) + { + // We can grab the argument from the old call + // that was being used for this parameter, but + // we need to check whether or not the new call + // will use it. + // + auto oldParamIndex = oldParamCounter++; + auto oldArg = oldCall->getArg(oldParamIndex); + + // Depending on how the callee specialized this parameter, + // we will pass the argument, or data derived from it, + // or nothing. + // + switch( oldParamInfo.oldArgMode ) + { + default: + SLANG_UNEXPECTED("unhandled case"); + break; + + case ParamInfo::OldArgMode::Keep: + // If the parameter was not specialized away, then + // the argument should be passed as-is. + // + newArgs.add(oldArg); + break; + + case ParamInfo::OldArgMode::Ignore: + // If the parameter was specialized out of existence, + // then we don't pass the argument in at all. + // + break; + + case ParamInfo::OldArgMode::Deref: + // If an `inout` argument has been specialized into an + // `in` argument, then we need to dereference the pointer + // that was being passed in before, and pass in the value + // it points to instead. + // + // Note: the expectation is that once the call site has + // been specialized, subsequent optimization will eliminate + // this `load`, and replace it with whatever value was + // being stored for the `inout` argument before the call. + // + newArgs.add(builder.emitLoad(oldArg)); + break; + } + + // A resource parameter that got specialized might also introduce new + // `out` parameters that help the caller compute the right result + // value (e.g., array indices). Those parameters will come right + // in the parameter list right after the location of the original + // parameter. + // + for( Index i = 0; i < oldParamInfo.newOutputParamCount; ++i ) + { + newArgs.add(newOutputVars[oldParamInfo.firstNewOutputParamIndex + i]); + } + } + + // The function return value can also require new `out` parameters as + // part of specialization; any parameters it introduces will go + // over all the others. + // + for( Index i = 0; i < funcInfo.result.newOutputParamCount; ++i ) + { + newArgs.add(newOutputVars[funcInfo.result.firstNewOutputParamIndex + i]); + } + + // Once we've built up the argument list for the new call we can emit + // it, and also transfer any helpful decorations from the old call + // over to the new one. + // + auto newResultType = newFunc->getResultType(); + auto newCall = builder.emitCallInst(newResultType, newFunc, newArgs); + oldCall->transferDecorationsTo(newCall); + + // Just calling the specialized function is not enough, of course, + // because the whole point of this pass was to move the logic + // that computes a result resource from the callee up to the caller. + // + // After the call is completed, any additional `out` arguments will + // have had their values filled in (e.g., the callee will have + // computed the array index to be used, etc.). + // + // We can now iterate over the parameters again, and identify + // the output parameters that have been specialized. + // + oldParamCounter = 0; + for( auto const& oldParamInfo : funcInfo.oldParams ) + { + auto oldParamIndex = oldParamCounter++; + auto oldArg = oldCall->getArg(oldParamIndex); + + // We skip over parameters that were not specialized. + // + if(oldParamInfo.flavor == OutputInfo::Flavor::None) + continue; + + // For any paraemter that was specialized, we will use + // the computed information on the parameter to materialize + // a value for the output in the context of the caller. + // + auto value = materialize(builder, oldParamInfo); + + // For an `out` or `inout` parameter, the `oldArg` represents + // a pointer where the output value should be stored, so + // we can emulate the behavior of the original function + // by storing the value as expected. + // + builder.emitStore(oldArg, value); + } + + // If the function result is an output that needs to be + // specialized, then we need to handle it much like the + // parameter case above. + // + if( funcInfo.result.flavor != OutputInfo::Flavor::None ) + { + // We materialize the expected function result into + // an IR value in the context of the caller, and then + // use that value to replace any uses of the return + // value of the original call. + // + auto value = materialize(builder, funcInfo.result); + oldCall->replaceUsesWith(value); + } + else + { + // If the call was specialized, but the return value + // was not something that needed specialization, then + // we still need to replace any uses of the original + // call to use the value of the new call. + // + oldCall->replaceUsesWith(newCall); + } + + // After we've fully wired up the new call, we eliminate + // the old call site, which will have no more uses. + // + // Note: At this point, the body of the caller function + // is likely to have opportunities for further optimization. + // Simple dataflow optimizations should now be able to + // resolve the identities of resources that had previously + // only been visible as the value of local variables or + // the results of `call` instructions. + // + oldCall->removeAndDeallocate(); + } + + // In order to specialize call sites to functions that output + // resources, we need a way to materialize the value for an + // output in the context of the caller, based on the information + // that was gathered in the callee. + + IRInst* materialize(IRBuilder& builder, OutputInfo const& info) + { + // For now, we are only handling a small fraction of the + // possible cases. + + SLANG_UNUSED(builder); + + // The basic idea is to look at the `representative` instruction + // that stands in for the output value (which is an instruction + // from the body of the callee), and to produce an equivalent + // value in the context of the caller. + // + auto representative = info.representative; + switch( representative->op ) + { + default: + // Because we only allow certain instructions when specializing + // the callee, any instruction outside of the allowed ones + // would represent an internal error. + // + SLANG_UNEXPECTED("unhandled case"); + UNREACHABLE_RETURN(nullptr); + + case kIROp_GlobalParam: + // If the value in the callee was a reference to a global parameter, + // then we can simply refer to the same parameter here in the caller. + // + return representative; + + // TODO: As other cases are added to `specializeOutputValue()`, we will + // need to add corresponding cases here. + } + } + + // TODO: A really important mising step here is that we need AST-level rules + // that express the constraints on how resource-bearing types can and + // cannot be used for local variables, `out` parameters, etc. + + // TODO: We should add another pass that takes any global variables + // of resource type and transforms them into `in`/`out`/`inout` parameters + // in any function that accesses them (and proceeds transitively up + // the call stack), with a special rule that the globals translate into + // local variables in each entry point function that needs them. + // + // Such a pass would reduce the problem of supporting global variables + // with resource types to that of supporting locals and return values of + // resource type. + // + // Note: that same pass could just apply to *all* globals for targets where + // HLSL-style thread-local globals aren't supported. The main challenge that + // would need to be worked out there is interaction with separate compilation, + // but transforming them so that the function signatures are changed makes + // the challenge more explicit and thus perhaps easier to tackle. + + // TODO: We probably need to update the two passes in this file so that they + // work in an iterative fashion (combined with some SSA "cleanup" on function + // bodies), because each optimization may open up opportunties for the other + // to apply. +}; + +void specializeResourceOutputs( + BackEndCompileRequest* compileRequest, + TargetRequest* targetRequest, + IRModule* module) +{ + if(isD3DTarget(targetRequest) || isKhronosTarget(targetRequest)) + {} + else + { + // Don't bother applying this pass on targets that won't + // benefit from it. + // + // TODO: it would be good if we could express this kind + // of conditional in a way that doesn't involve explicitly + // enumerating matching targets. + // + return; + } + + ResourceOutputSpecializationPass pass; + pass.compileRequest = compileRequest; + pass.targetRequest = targetRequest; + pass.module = module; + pass.processModule(); +} + +} // namespace Slang diff --git a/source/slang/slang-ir-specialize-resources.h b/source/slang/slang-ir-specialize-resources.h index 1a5e0f7d8..62a2728bc 100644 --- a/source/slang/slang-ir-specialize-resources.h +++ b/source/slang/slang-ir-specialize-resources.h @@ -21,4 +21,9 @@ namespace Slang BackEndCompileRequest* compileRequest, TargetRequest* targetRequest, IRModule* module); + + void specializeResourceOutputs( + BackEndCompileRequest* compileRequest, + TargetRequest* targetRequest, + IRModule* module); } diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index 37841c35f..bc7f7970b 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -678,7 +678,7 @@ namespace Slang block->insertAtEnd(this); } - void fixUpFuncType(IRFunc* func) + void fixUpFuncType(IRFunc* func, IRType* resultType) { SLANG_ASSERT(func); @@ -699,12 +699,15 @@ namespace Slang paramTypes.add(param->getFullType()); } - auto resultType = func->getResultType(); - auto funcType = builder.getFuncType(paramTypes, resultType); builder.setDataType(func, funcType); } + void fixUpFuncType(IRFunc* func) + { + fixUpFuncType(func, func->getResultType()); + } + // bool isTerminatorInst(IROp op) diff --git a/source/slang/slang-ir.h b/source/slang/slang-ir.h index 9a0c71cb1..03d29280a 100644 --- a/source/slang/slang-ir.h +++ b/source/slang/slang-ir.h @@ -1353,6 +1353,18 @@ struct IRFunc : IRGlobalValueWithParams }; /// Adjust the type of an IR function based on its parameter list. + /// + /// The function type formed will use the types of the actual + /// parameters in the body of `func`, as well as the given `resultType`. + /// +void fixUpFuncType(IRFunc* func, IRType* resultType); + + /// Adjust the type of an IR function based on its parameter list. + /// + /// The function type formed will use the types of the actual + /// parameters in the body of `func`, as well as the result type + /// that is found on the current type of `func`. + /// void fixUpFuncType(IRFunc* func); // A generic is akin to a function, but is conceptually executed |
