diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/slang-emit.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 5 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-arrays.cpp | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-buffer-load-arg.cpp | 96 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-buffer-load-arg.h | 44 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-function-call.cpp | 162 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-function-call.h | 5 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-resources.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-lower-to-ir.cpp | 150 |
9 files changed, 319 insertions, 152 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 12dd80135..4a0018770 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -25,6 +25,7 @@ #include "slang-ir-restructure-scoping.h" #include "slang-ir-specialize.h" #include "slang-ir-specialize-arrays.h" +#include "slang-ir-specialize-buffer-load-arg.h" #include "slang-ir-specialize-resources.h" #include "slang-ir-ssa.h" #include "slang-ir-strip-witness-tables.h" @@ -451,6 +452,7 @@ Result linkAndOptimizeIR( // pass down the target request along with the IR. // specializeResourceOutputs(compileRequest, targetRequest, irModule); + specializeFuncsForBufferLoadArgs(compileRequest, targetRequest, irModule); specializeResourceParameters(compileRequest, targetRequest, irModule); // For GLSL targets, we also want to specialize calls to functions that diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 904b9955e..267866b1b 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -1310,6 +1310,8 @@ struct IRLoad : IRInst { IRUse ptr; IR_LEAF_ISA(Load) + + IRInst* getPtr() { return ptr.get(); } }; struct IRStore : IRInst @@ -1317,6 +1319,9 @@ struct IRStore : IRInst IRUse ptr; IRUse val; IR_LEAF_ISA(Store) + + IRInst* getPtr() { return ptr.get(); } + IRInst* getVal() { return val.get(); } }; struct IRFieldExtract : IRInst diff --git a/source/slang/slang-ir-specialize-arrays.cpp b/source/slang/slang-ir-specialize-arrays.cpp index 53e317b81..2ed3da479 100644 --- a/source/slang/slang-ir-specialize-arrays.cpp +++ b/source/slang/slang-ir-specialize-arrays.cpp @@ -35,8 +35,9 @@ struct ArrayParameterSpecializationCondition : FunctionCallSpecializeCondition return false; } - bool doesParamNeedSpecialization(IRParam* param) + bool doesParamWantSpecialization(IRParam* param, IRInst* arg) { + SLANG_UNUSED(arg); return isStructTypeWithArray(param->getDataType()); } }; diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.cpp b/source/slang/slang-ir-specialize-buffer-load-arg.cpp new file mode 100644 index 000000000..353c6a104 --- /dev/null +++ b/source/slang/slang-ir-specialize-buffer-load-arg.cpp @@ -0,0 +1,96 @@ +// slang-ir-specialize-buffer-load-arg.cpp +#include "slang-ir-specialize-buffer-load-arg.h" + +#include "slang-ir.h" +#include "slang-ir-insts.h" +#include "slang-ir-specialize-function-call.h" + +namespace Slang +{ + +// This file implements a pass that translates function call sites where +// the result of a buffer load from a global shader parameter (e.g., a +// global constant buffer) is being passed through to the callee. It +// replaces those with calls to specialized callee functions that directly +// reference the chosen global. +// +// As swith most of our IR passes, we encapsulate the logic here in a context +// type so that the data that needs to be shared throughout the pass can +// be conveniently scoped. + +struct FuncBufferLoadSpecializationCondition : FunctionCallSpecializeCondition +{ + typedef FunctionCallSpecializeCondition Super; + + virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg) + { + // We only want to specialize for `struct` types and not base types. + // + // TODO: We might want to consider some criteria here for the "large-ness" + // of a structure (in terms of bytes and/or fields), so that we don't + // eliminate loads of sufficiently small types (which are cheap to pass + // by value). + // + auto paramType = param->getDataType(); + if(!as<IRStructType>(paramType)) + return false; + + // We also only want to specialize for arguments that are a load + // from some kind of global shader parameter. + // + IRInst* a = arg; + if (auto argLoad = as<IRLoad>(arg)) + { + a = argLoad->getPtr(); + } + else + { + return false; + } + + // We want to handle loads from a shader parameter that is an array + // of buffers, and not just a single global buffer. + // + while (auto argGetElement = as<IRGetElement>(a)) + { + a = argGetElement->getBase(); + } + + // The "root" of the parameter must be a reference to a global-scope + // shader parameter, so that we know we can substitute it into the callee. + // + if (auto argGlobalParam = as<IRGlobalParam>(a)) + { + return true; + } + else + { + return false; + } + + // TODO: There are other patterns that we could attempt to optimize here. + // For example, this logic only handles loads of the *entire* contents of + // a buffer, so it would miss: + // + // * A load of a large structure from field in a constant buffer, so that + // the value loaded is not the entire buffer contents. + // + // * A load of a large structure from a structured buffer, or any other kind + // of buffer that requires an index. + // + // * Any resource load that is not expressed at the IR level with a `load` + // instruction (e.g., those that might use an intrinsic function). + // + } +}; + +void specializeFuncsForBufferLoadArgs( + BackEndCompileRequest* compileRequest, + TargetRequest* targetRequest, + IRModule* module) +{ + FuncBufferLoadSpecializationCondition condition; + specializeFunctionCalls(compileRequest, targetRequest, module, &condition); +} + +} diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.h b/source/slang/slang-ir-specialize-buffer-load-arg.h new file mode 100644 index 000000000..9d79a870e --- /dev/null +++ b/source/slang/slang-ir-specialize-buffer-load-arg.h @@ -0,0 +1,44 @@ +// slang-ir-specialize-buffer-load-arg.h +#pragma once + +namespace Slang +{ +class BackEndCompileRequest; +class TargetRequest; +struct IRModule; + + + /// Specialize functions in `module` that are called with direct loads from buffers. + /// + /// For example: + /// + /// struct Params { /* many fields */ } + /// int helper(Params p, int x) { return p.justOneField + x; } + /// ... + /// ConstantBuffer<Params> gParams; + /// ... + /// int z = helper(gParams, y); + /// + /// In this case, the function `helper` declares a very large structure type as + /// a by-value argument. Depending on the final code-generation target, this could + /// result in output code that loads the entire contents of `gParams` before passing + /// it to `helper`, which then uses only a single field (rendering the rest of the load + /// operations wasted). + /// + /// This pass is designed to specialize a callee function like `helper` based on call + /// sites in this form, so that the output code is: + /// + /// struct Params { /* as before */ } + /// ConstantBuffer<Params> gParams; + /// int helper_1(int x) { return gParams.justOneField + x; } + /// ... + /// int z = helper_1(y); + /// + /// Note how in the transformed code, there is no longer any attempt to load the rest + /// of the contents of `gParams`. + /// +void specializeFuncsForBufferLoadArgs( + BackEndCompileRequest* compileRequest, + TargetRequest* targetRequest, + IRModule* module); +} diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp index eb574c002..0341438c5 100644 --- a/source/slang/slang-ir-specialize-function-call.cpp +++ b/source/slang/slang-ir-specialize-function-call.cpp @@ -8,6 +8,61 @@ namespace Slang { +bool FunctionCallSpecializeCondition::isParamSuitableForSpecialization(IRParam* param, IRInst* inArg) +{ + SLANG_UNUSED(param); + + // Determining if an argument is suitable for + // specializing a callee function requires + // looking at its (recurisve) structure. + // + // Rather than write a recursively procedure + // here, we will be tail-recursive by using + // a simple loop. + // + IRInst* arg = inArg; + for (;;) + { + // The leaf case we care about is when the + // argument at the call site is a global + // shader parameter, because then we can + // specialize a callee to refer to the same + // global parameter directly. + // + if (as<IRGlobalParam>(arg)) return true; + + // As we will see later, we can also + // specialize a call when the argument + // is the result of indexing into an + // array (`base[index]`) *if* the `base` + // of the indexing operation is also + // suitable for specialization. + // + if (arg->getOp() == kIROp_getElement || arg->getOp() == kIROp_Load) + { + auto base = arg->getOperand(0); + + // We will "recurse" on the base of + // the indexing operation by continuing + // our loop with the `base` as our new + // argument. + // + arg = base; + continue; + } + + // By default, we will *not* consider an argument + // suitable for specialization. + // + // TODO: There may be other cases that are worth + // handling here. The current code is based on + // observation of what simple shaders do in + // practice. + // + return false; + } +} + struct FunctionParameterSpecializationContext { // This type implements a pass to specialize functions @@ -121,14 +176,15 @@ struct FunctionParameterSpecializationContext // two conditions we care about: // // 1. Should we specialize? This amounts to whether - // `func` has any parameters that need specialization. - // We will call those "specializable" parameters for - // lack of a better name. + // `func` has any parameters that "want" specialization, + // or wheter `call` has any arguments that "want" specialization. + // If either the parameter or argument at a given position + // want specialization, we will call the coresponding parameter + // a "specializable" parameter for lack of a better name. // // 2. Can we specialize? This amounts to whether the - // arguments in `call` that correspond to those - // specializable parameters are "suitable" for use - // in specialization. + // parameter of `func` and the corresponding argument to + // `call` are both "suitable" for specialization. // // We are going to answer both of these queries in // a single loop that walks over the parameters of @@ -147,23 +203,23 @@ struct FunctionParameterSpecializationContext SLANG_ASSERT(argIndex < call->getArgCount()); auto arg = call->getArg(argIndex); - // If the given parameter doesn't need specialization, + // If neither the parameter nor the argument wants specialization, // then we need to keep looking. // - if(!doesParamNeedSpecialization(param)) + if(!doesParamWantSpecialization(param, arg)) continue; - // If we have run into a `param` that needs specialization, + // If we have run into a `param` or `arg` that wants specialization, // then our first condition is met. // anySpecializableParam = true; - // Now we need to check whether `arg` is actually suitable + // Now we need to check whether `param` and `arg` are actually suitable // for specialization (our second condition). If not, we // can bail out immediately because our second condition // cannot be met. // - if(!isArgSuitableForSpecialization(arg)) + if(!isParamSuitableForSpecialization(param, arg)) return false; } @@ -178,62 +234,14 @@ struct FunctionParameterSpecializationContext // Of course, now we need to back-fill the predicates that // the above function used to evaluate prameters and arguments. - bool doesParamNeedSpecialization(IRParam* param) + bool doesParamWantSpecialization(IRParam* param, IRInst* arg) { - return condition->doesParamNeedSpecialization(param); + return condition->doesParamWantSpecialization(param, arg); } - bool isArgSuitableForSpecialization(IRInst* inArg) + bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg) { - // Determining if an argument is suitable for - // specializing a callee function requires - // looking at its (recurisve) structure. - // - // Rather than write a recursively procedure - // here, we will be tail-recursive by using - // a simple loop. - // - IRInst* arg = inArg; - for(;;) - { - // The leaf case we care about is when the - // argument at the call site is a global - // shader parameter, because then we can - // specialize a callee to refer to the same - // global parameter directly. - // - if(as<IRGlobalParam>(arg)) return true; - - // As we will see later, we can also - // specialize a call when the argument - // is the result of indexing into an - // array (`base[index]`) *if* the `base` - // of the indexing operation is also - // suitable for specialization. - // - if( arg->getOp() == kIROp_getElement || arg->getOp() == kIROp_Load ) - { - auto base = arg->getOperand(0); - - // We will "recurse" on the base of - // the indexing operation by continuing - // our loop with the `base` as our new - // argument. - // - arg = base; - continue; - } - - // By default, we will *not* consider an argument - // suitable for specialization. - // - // TODO: There may be other cases that are worth - // handling here. The current code is based on - // observation of what simple shaders do in - // practice. - // - return false; - } + return condition->isParamSuitableForSpecialization(param, arg); } // Once we'e determined that a given call site can/should @@ -451,10 +459,10 @@ struct FunctionParameterSpecializationContext IRParam* oldParam, IRInst* oldArg) { - // We know that the case where a parameter - // doesn't need specialization is easy. + // We know that the case where the parameter + // and argument don't want specialization is easy. // - if( !doesParamNeedSpecialization(oldParam) ) + if( !doesParamWantSpecialization(oldParam, oldArg) ) { // The new call site will use the same argument // value as the old one, and we don't need @@ -470,6 +478,12 @@ struct FunctionParameterSpecializationContext // is handled with a different function // because it needs to recurse in some cases. // + // We will add the parameter that we are specializing to + // the key for caching of specializations, because functions + // specialized at different parameter positions should not + // be shared. + // + ioInfo.key.vals.add(oldParam); getCallInfoForArg(ioInfo, oldArg); } } @@ -572,7 +586,7 @@ struct FunctionParameterSpecializationContext // As always, the easy case is when the parameter of // the original function doesn't need specialization. // - if( !doesParamNeedSpecialization(oldParam) ) + if( !doesParamWantSpecialization(oldParam, oldArg) ) { // The specialized callee will need a new parameter // that fills the same role as the old one, so we @@ -677,9 +691,19 @@ struct FunctionParameterSpecializationContext return newVal; } - else if (oldArg->getOp() == kIROp_Load) + else if (auto oldArgLoad = as<IRLoad>(oldArg)) { - return getSpecializedValueForArg(ioInfo, oldArg->getOperand(0)); + auto oldPtr = oldArgLoad->getPtr(); + auto newPtr = getSpecializedValueForArg(ioInfo, oldPtr); + + auto builder = getBuilder(); + builder->setInsertInto(nullptr); + auto newVal = builder->emitLoad( + oldArg->getFullType(), + newPtr); + ioInfo.newBodyInsts.add(newVal); + + return newVal; } else { diff --git a/source/slang/slang-ir-specialize-function-call.h b/source/slang/slang-ir-specialize-function-call.h index 90c463374..868f9def2 100644 --- a/source/slang/slang-ir-specialize-function-call.h +++ b/source/slang/slang-ir-specialize-function-call.h @@ -5,13 +5,16 @@ namespace Slang { class BackEndCompileRequest; class TargetRequest; + struct IRInst; struct IRModule; struct IRParam; class FunctionCallSpecializeCondition { public: - virtual bool doesParamNeedSpecialization(IRParam* param) = 0; + virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg) = 0; + + virtual bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg); }; diff --git a/source/slang/slang-ir-specialize-resources.cpp b/source/slang/slang-ir-specialize-resources.cpp index c7398fe23..00357ca50 100644 --- a/source/slang/slang-ir-specialize-resources.cpp +++ b/source/slang/slang-ir-specialize-resources.cpp @@ -18,8 +18,10 @@ struct ResourceParameterSpecializationCondition : FunctionCallSpecializeConditio TargetRequest* targetRequest = nullptr; - bool doesParamNeedSpecialization(IRParam* param) + bool doesParamWantSpecialization(IRParam* param, IRInst* arg) { + SLANG_UNUSED(arg); + // Whether or not a parameter needs specialization is really // a function of its type: // diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index d2d15735c..c7e32072e 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -2014,6 +2014,40 @@ LoweredValInfo createVar( return LoweredValInfo::ptr(irAlloc); } +// When we try to turn a `LoweredValInfo` into an address of some temporary storage, +// we can either do it "aggressively" or not (what we'll call the "default" behavior, +// although it isn't strictly more common). +// +// The case that this is mostly there to address is when somebody writes an operation +// like: +// +// foo[a] = b; +// +// In that case, we might as well just use the `set` accessor if there is one, rather +// than complicate things. However, in more complex cases like: +// +// foo[a].x = b; +// +// there is no way to satisfy the semantics of the code the user wrote (in terms of +// only writing one vector component, and not a full vector) by using the `set` +// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into +// an address. +// +// TODO: realistically IR lowering is too early to be binding to this choice, +// because different accessors might be supported on different targets. +// +enum class TryGetAddressMode +{ + Default, + Aggressive, +}; + +/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address. +LoweredValInfo tryGetAddress( + IRGenContext* context, + LoweredValInfo const& inVal, + TryGetAddressMode mode); + /// Add a single `in` argument value to a list of arguments void addInArg( IRGenContext* context, @@ -2092,59 +2126,49 @@ void addArg( // According to our "calling convention" we need to // pass a pointer into the callee. // - // A naive approach would be to just take the address - // of `loweredArg` above and pass it in, but that - // has two issues: - // - // 1. The l-value might not be something that has a single - // well-defined "address" (e.g., `foo.xzy`). - // - // 2. The l-value argument might actually alias some other - // storage that the callee will access (e.g., we are - // passing in a global variable, or two `out` parameters - // are being passed the same location in an array). - // - // In each of these cases, the safe option is to create - // a temporary variable to use for argument-passing, - // and then do copy-in/copy-out around the call. + // Ideally we would like to just pass the address of + // `loweredArg`, and when that it possible we will do so. + // It may happen, though, that `loweredArg` is not an + // addressable l-value (e.g., it is `foo.xyz`, so that + // the bytes of the l-value are not contiguous). // - // TODO: We should consider ruling out case (2) as undefined - // behavior, and specify that whether `inout` and `out` are - // handled via copy-in-copy-out or by-reference parameter - // passing is an implementation detail. That would allow - // us to avoid introducing a copy except where it is required - // for the semantics of (1). - // - // TODO: We should confirm whether such a change will make - // it harder to create SSA values for variables that get - // used with `out` or `inout` parameters. - - LoweredValInfo tempVar = createVar(context, paramType); - - // If the parameter is `in out` or `inout`, then we need - // to ensure that we pass in the original value stored - // in the argument, which we accomplish by assigning - // from the l-value to our temp. - if(paramDirection == kParameterDirection_InOut) + LoweredValInfo argPtr = tryGetAddress(context, argVal, TryGetAddressMode::Default); + if(argPtr.flavor == LoweredValInfo::Flavor::Ptr) { - assign(context, tempVar, argVal); + addInArg(context, ioArgs, LoweredValInfo::simple(argPtr.val)); } + else + { + // If the value is not one that could yield a simple l-value + // then we need to convert it into a temporary + // + LoweredValInfo tempVar = createVar(context, paramType); - // Now we can pass the address of the temporary variable - // to the callee as the actual argument for the `in out` - SLANG_ASSERT(tempVar.flavor == LoweredValInfo::Flavor::Ptr); - IRInst* tempPtr = getAddress(context, tempVar, loc); - addInArg(context, ioArgs, LoweredValInfo::simple(tempPtr)); + // If the parameter is `in out` or `inout`, then we need + // to ensure that we pass in the original value stored + // in the argument, which we accomplish by assigning + // from the l-value to our temp. + // + if (paramDirection == kParameterDirection_InOut) + { + assign(context, tempVar, argVal); + } - // Finally, after the call we will need - // to copy in the other direction: from our - // temp back to the original l-value. - OutArgumentFixup fixup; - fixup.src = tempVar; - fixup.dst = argVal; + // Now we can pass the address of the temporary variable + // to the callee as the actual argument for the `in out` + SLANG_ASSERT(tempVar.flavor == LoweredValInfo::Flavor::Ptr); + IRInst* tempPtr = getAddress(context, tempVar, loc); + addInArg(context, ioArgs, LoweredValInfo::simple(tempPtr)); - (*ioFixups).add(fixup); + // Finally, after the call we will need + // to copy in the other direction: from our + // temp back to the original l-value. + OutArgumentFixup fixup; + fixup.src = tempVar; + fixup.dst = argVal; + (*ioFixups).add(fixup); + } } break; @@ -2196,40 +2220,6 @@ void addCallArgsForParam( // -// When we try to turn a `LoweredValInfo` into an address of some temporary storage, -// we can either do it "aggressively" or not (what we'll call the "default" behavior, -// although it isn't strictly more common). -// -// The case that this is mostly there to address is when somebody writes an operation -// like: -// -// foo[a] = b; -// -// In that case, we might as well just use the `set` accessor if there is one, rather -// than complicate things. However, in more complex cases like: -// -// foo[a].x = b; -// -// there is no way to satisfy the semantics of the code the user wrote (in terms of -// only writing one vector component, and not a full vector) by using the `set` -// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into -// an address. -// -// TODO: realistically IR lowering is too early to be binding to this choice, -// because different accessors might be supported on different targets. -// -enum class TryGetAddressMode -{ - Default, - Aggressive, -}; - -/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address. -LoweredValInfo tryGetAddress( - IRGenContext* context, - LoweredValInfo const& inVal, - TryGetAddressMode mode); - /// Compute the direction for a parameter based on its declaration ParameterDirection getParameterDirection(VarDeclBase* paramDecl) { |
