summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/slang/slang-emit.cpp2
-rw-r--r--source/slang/slang-ir-insts.h5
-rw-r--r--source/slang/slang-ir-specialize-arrays.cpp3
-rw-r--r--source/slang/slang-ir-specialize-buffer-load-arg.cpp96
-rw-r--r--source/slang/slang-ir-specialize-buffer-load-arg.h44
-rw-r--r--source/slang/slang-ir-specialize-function-call.cpp162
-rw-r--r--source/slang/slang-ir-specialize-function-call.h5
-rw-r--r--source/slang/slang-ir-specialize-resources.cpp4
-rw-r--r--source/slang/slang-lower-to-ir.cpp150
9 files changed, 319 insertions, 152 deletions
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 12dd80135..4a0018770 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -25,6 +25,7 @@
#include "slang-ir-restructure-scoping.h"
#include "slang-ir-specialize.h"
#include "slang-ir-specialize-arrays.h"
+#include "slang-ir-specialize-buffer-load-arg.h"
#include "slang-ir-specialize-resources.h"
#include "slang-ir-ssa.h"
#include "slang-ir-strip-witness-tables.h"
@@ -451,6 +452,7 @@ Result linkAndOptimizeIR(
// pass down the target request along with the IR.
//
specializeResourceOutputs(compileRequest, targetRequest, irModule);
+ specializeFuncsForBufferLoadArgs(compileRequest, targetRequest, irModule);
specializeResourceParameters(compileRequest, targetRequest, irModule);
// For GLSL targets, we also want to specialize calls to functions that
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index 904b9955e..267866b1b 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -1310,6 +1310,8 @@ struct IRLoad : IRInst
{
IRUse ptr;
IR_LEAF_ISA(Load)
+
+ IRInst* getPtr() { return ptr.get(); }
};
struct IRStore : IRInst
@@ -1317,6 +1319,9 @@ struct IRStore : IRInst
IRUse ptr;
IRUse val;
IR_LEAF_ISA(Store)
+
+ IRInst* getPtr() { return ptr.get(); }
+ IRInst* getVal() { return val.get(); }
};
struct IRFieldExtract : IRInst
diff --git a/source/slang/slang-ir-specialize-arrays.cpp b/source/slang/slang-ir-specialize-arrays.cpp
index 53e317b81..2ed3da479 100644
--- a/source/slang/slang-ir-specialize-arrays.cpp
+++ b/source/slang/slang-ir-specialize-arrays.cpp
@@ -35,8 +35,9 @@ struct ArrayParameterSpecializationCondition : FunctionCallSpecializeCondition
return false;
}
- bool doesParamNeedSpecialization(IRParam* param)
+ bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
{
+ SLANG_UNUSED(arg);
return isStructTypeWithArray(param->getDataType());
}
};
diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.cpp b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
new file mode 100644
index 000000000..353c6a104
--- /dev/null
+++ b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
@@ -0,0 +1,96 @@
+// slang-ir-specialize-buffer-load-arg.cpp
+#include "slang-ir-specialize-buffer-load-arg.h"
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-specialize-function-call.h"
+
+namespace Slang
+{
+
+// This file implements a pass that translates function call sites where
+// the result of a buffer load from a global shader parameter (e.g., a
+// global constant buffer) is being passed through to the callee. It
+// replaces those with calls to specialized callee functions that directly
+// reference the chosen global.
+//
+// As swith most of our IR passes, we encapsulate the logic here in a context
+// type so that the data that needs to be shared throughout the pass can
+// be conveniently scoped.
+
+struct FuncBufferLoadSpecializationCondition : FunctionCallSpecializeCondition
+{
+ typedef FunctionCallSpecializeCondition Super;
+
+ virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
+ {
+ // We only want to specialize for `struct` types and not base types.
+ //
+ // TODO: We might want to consider some criteria here for the "large-ness"
+ // of a structure (in terms of bytes and/or fields), so that we don't
+ // eliminate loads of sufficiently small types (which are cheap to pass
+ // by value).
+ //
+ auto paramType = param->getDataType();
+ if(!as<IRStructType>(paramType))
+ return false;
+
+ // We also only want to specialize for arguments that are a load
+ // from some kind of global shader parameter.
+ //
+ IRInst* a = arg;
+ if (auto argLoad = as<IRLoad>(arg))
+ {
+ a = argLoad->getPtr();
+ }
+ else
+ {
+ return false;
+ }
+
+ // We want to handle loads from a shader parameter that is an array
+ // of buffers, and not just a single global buffer.
+ //
+ while (auto argGetElement = as<IRGetElement>(a))
+ {
+ a = argGetElement->getBase();
+ }
+
+ // The "root" of the parameter must be a reference to a global-scope
+ // shader parameter, so that we know we can substitute it into the callee.
+ //
+ if (auto argGlobalParam = as<IRGlobalParam>(a))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+
+ // TODO: There are other patterns that we could attempt to optimize here.
+ // For example, this logic only handles loads of the *entire* contents of
+ // a buffer, so it would miss:
+ //
+ // * A load of a large structure from field in a constant buffer, so that
+ // the value loaded is not the entire buffer contents.
+ //
+ // * A load of a large structure from a structured buffer, or any other kind
+ // of buffer that requires an index.
+ //
+ // * Any resource load that is not expressed at the IR level with a `load`
+ // instruction (e.g., those that might use an intrinsic function).
+ //
+ }
+};
+
+void specializeFuncsForBufferLoadArgs(
+ BackEndCompileRequest* compileRequest,
+ TargetRequest* targetRequest,
+ IRModule* module)
+{
+ FuncBufferLoadSpecializationCondition condition;
+ specializeFunctionCalls(compileRequest, targetRequest, module, &condition);
+}
+
+}
diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.h b/source/slang/slang-ir-specialize-buffer-load-arg.h
new file mode 100644
index 000000000..9d79a870e
--- /dev/null
+++ b/source/slang/slang-ir-specialize-buffer-load-arg.h
@@ -0,0 +1,44 @@
+// slang-ir-specialize-buffer-load-arg.h
+#pragma once
+
+namespace Slang
+{
+class BackEndCompileRequest;
+class TargetRequest;
+struct IRModule;
+
+
+ /// Specialize functions in `module` that are called with direct loads from buffers.
+ ///
+ /// For example:
+ ///
+ /// struct Params { /* many fields */ }
+ /// int helper(Params p, int x) { return p.justOneField + x; }
+ /// ...
+ /// ConstantBuffer<Params> gParams;
+ /// ...
+ /// int z = helper(gParams, y);
+ ///
+ /// In this case, the function `helper` declares a very large structure type as
+ /// a by-value argument. Depending on the final code-generation target, this could
+ /// result in output code that loads the entire contents of `gParams` before passing
+ /// it to `helper`, which then uses only a single field (rendering the rest of the load
+ /// operations wasted).
+ ///
+ /// This pass is designed to specialize a callee function like `helper` based on call
+ /// sites in this form, so that the output code is:
+ ///
+ /// struct Params { /* as before */ }
+ /// ConstantBuffer<Params> gParams;
+ /// int helper_1(int x) { return gParams.justOneField + x; }
+ /// ...
+ /// int z = helper_1(y);
+ ///
+ /// Note how in the transformed code, there is no longer any attempt to load the rest
+ /// of the contents of `gParams`.
+ ///
+void specializeFuncsForBufferLoadArgs(
+ BackEndCompileRequest* compileRequest,
+ TargetRequest* targetRequest,
+ IRModule* module);
+}
diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp
index eb574c002..0341438c5 100644
--- a/source/slang/slang-ir-specialize-function-call.cpp
+++ b/source/slang/slang-ir-specialize-function-call.cpp
@@ -8,6 +8,61 @@
namespace Slang
{
+bool FunctionCallSpecializeCondition::isParamSuitableForSpecialization(IRParam* param, IRInst* inArg)
+{
+ SLANG_UNUSED(param);
+
+ // Determining if an argument is suitable for
+ // specializing a callee function requires
+ // looking at its (recurisve) structure.
+ //
+ // Rather than write a recursively procedure
+ // here, we will be tail-recursive by using
+ // a simple loop.
+ //
+ IRInst* arg = inArg;
+ for (;;)
+ {
+ // The leaf case we care about is when the
+ // argument at the call site is a global
+ // shader parameter, because then we can
+ // specialize a callee to refer to the same
+ // global parameter directly.
+ //
+ if (as<IRGlobalParam>(arg)) return true;
+
+ // As we will see later, we can also
+ // specialize a call when the argument
+ // is the result of indexing into an
+ // array (`base[index]`) *if* the `base`
+ // of the indexing operation is also
+ // suitable for specialization.
+ //
+ if (arg->getOp() == kIROp_getElement || arg->getOp() == kIROp_Load)
+ {
+ auto base = arg->getOperand(0);
+
+ // We will "recurse" on the base of
+ // the indexing operation by continuing
+ // our loop with the `base` as our new
+ // argument.
+ //
+ arg = base;
+ continue;
+ }
+
+ // By default, we will *not* consider an argument
+ // suitable for specialization.
+ //
+ // TODO: There may be other cases that are worth
+ // handling here. The current code is based on
+ // observation of what simple shaders do in
+ // practice.
+ //
+ return false;
+ }
+}
+
struct FunctionParameterSpecializationContext
{
// This type implements a pass to specialize functions
@@ -121,14 +176,15 @@ struct FunctionParameterSpecializationContext
// two conditions we care about:
//
// 1. Should we specialize? This amounts to whether
- // `func` has any parameters that need specialization.
- // We will call those "specializable" parameters for
- // lack of a better name.
+ // `func` has any parameters that "want" specialization,
+ // or wheter `call` has any arguments that "want" specialization.
+ // If either the parameter or argument at a given position
+ // want specialization, we will call the coresponding parameter
+ // a "specializable" parameter for lack of a better name.
//
// 2. Can we specialize? This amounts to whether the
- // arguments in `call` that correspond to those
- // specializable parameters are "suitable" for use
- // in specialization.
+ // parameter of `func` and the corresponding argument to
+ // `call` are both "suitable" for specialization.
//
// We are going to answer both of these queries in
// a single loop that walks over the parameters of
@@ -147,23 +203,23 @@ struct FunctionParameterSpecializationContext
SLANG_ASSERT(argIndex < call->getArgCount());
auto arg = call->getArg(argIndex);
- // If the given parameter doesn't need specialization,
+ // If neither the parameter nor the argument wants specialization,
// then we need to keep looking.
//
- if(!doesParamNeedSpecialization(param))
+ if(!doesParamWantSpecialization(param, arg))
continue;
- // If we have run into a `param` that needs specialization,
+ // If we have run into a `param` or `arg` that wants specialization,
// then our first condition is met.
//
anySpecializableParam = true;
- // Now we need to check whether `arg` is actually suitable
+ // Now we need to check whether `param` and `arg` are actually suitable
// for specialization (our second condition). If not, we
// can bail out immediately because our second condition
// cannot be met.
//
- if(!isArgSuitableForSpecialization(arg))
+ if(!isParamSuitableForSpecialization(param, arg))
return false;
}
@@ -178,62 +234,14 @@ struct FunctionParameterSpecializationContext
// Of course, now we need to back-fill the predicates that
// the above function used to evaluate prameters and arguments.
- bool doesParamNeedSpecialization(IRParam* param)
+ bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
{
- return condition->doesParamNeedSpecialization(param);
+ return condition->doesParamWantSpecialization(param, arg);
}
- bool isArgSuitableForSpecialization(IRInst* inArg)
+ bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg)
{
- // Determining if an argument is suitable for
- // specializing a callee function requires
- // looking at its (recurisve) structure.
- //
- // Rather than write a recursively procedure
- // here, we will be tail-recursive by using
- // a simple loop.
- //
- IRInst* arg = inArg;
- for(;;)
- {
- // The leaf case we care about is when the
- // argument at the call site is a global
- // shader parameter, because then we can
- // specialize a callee to refer to the same
- // global parameter directly.
- //
- if(as<IRGlobalParam>(arg)) return true;
-
- // As we will see later, we can also
- // specialize a call when the argument
- // is the result of indexing into an
- // array (`base[index]`) *if* the `base`
- // of the indexing operation is also
- // suitable for specialization.
- //
- if( arg->getOp() == kIROp_getElement || arg->getOp() == kIROp_Load )
- {
- auto base = arg->getOperand(0);
-
- // We will "recurse" on the base of
- // the indexing operation by continuing
- // our loop with the `base` as our new
- // argument.
- //
- arg = base;
- continue;
- }
-
- // By default, we will *not* consider an argument
- // suitable for specialization.
- //
- // TODO: There may be other cases that are worth
- // handling here. The current code is based on
- // observation of what simple shaders do in
- // practice.
- //
- return false;
- }
+ return condition->isParamSuitableForSpecialization(param, arg);
}
// Once we'e determined that a given call site can/should
@@ -451,10 +459,10 @@ struct FunctionParameterSpecializationContext
IRParam* oldParam,
IRInst* oldArg)
{
- // We know that the case where a parameter
- // doesn't need specialization is easy.
+ // We know that the case where the parameter
+ // and argument don't want specialization is easy.
//
- if( !doesParamNeedSpecialization(oldParam) )
+ if( !doesParamWantSpecialization(oldParam, oldArg) )
{
// The new call site will use the same argument
// value as the old one, and we don't need
@@ -470,6 +478,12 @@ struct FunctionParameterSpecializationContext
// is handled with a different function
// because it needs to recurse in some cases.
//
+ // We will add the parameter that we are specializing to
+ // the key for caching of specializations, because functions
+ // specialized at different parameter positions should not
+ // be shared.
+ //
+ ioInfo.key.vals.add(oldParam);
getCallInfoForArg(ioInfo, oldArg);
}
}
@@ -572,7 +586,7 @@ struct FunctionParameterSpecializationContext
// As always, the easy case is when the parameter of
// the original function doesn't need specialization.
//
- if( !doesParamNeedSpecialization(oldParam) )
+ if( !doesParamWantSpecialization(oldParam, oldArg) )
{
// The specialized callee will need a new parameter
// that fills the same role as the old one, so we
@@ -677,9 +691,19 @@ struct FunctionParameterSpecializationContext
return newVal;
}
- else if (oldArg->getOp() == kIROp_Load)
+ else if (auto oldArgLoad = as<IRLoad>(oldArg))
{
- return getSpecializedValueForArg(ioInfo, oldArg->getOperand(0));
+ auto oldPtr = oldArgLoad->getPtr();
+ auto newPtr = getSpecializedValueForArg(ioInfo, oldPtr);
+
+ auto builder = getBuilder();
+ builder->setInsertInto(nullptr);
+ auto newVal = builder->emitLoad(
+ oldArg->getFullType(),
+ newPtr);
+ ioInfo.newBodyInsts.add(newVal);
+
+ return newVal;
}
else
{
diff --git a/source/slang/slang-ir-specialize-function-call.h b/source/slang/slang-ir-specialize-function-call.h
index 90c463374..868f9def2 100644
--- a/source/slang/slang-ir-specialize-function-call.h
+++ b/source/slang/slang-ir-specialize-function-call.h
@@ -5,13 +5,16 @@ namespace Slang
{
class BackEndCompileRequest;
class TargetRequest;
+ struct IRInst;
struct IRModule;
struct IRParam;
class FunctionCallSpecializeCondition
{
public:
- virtual bool doesParamNeedSpecialization(IRParam* param) = 0;
+ virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg) = 0;
+
+ virtual bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg);
};
diff --git a/source/slang/slang-ir-specialize-resources.cpp b/source/slang/slang-ir-specialize-resources.cpp
index c7398fe23..00357ca50 100644
--- a/source/slang/slang-ir-specialize-resources.cpp
+++ b/source/slang/slang-ir-specialize-resources.cpp
@@ -18,8 +18,10 @@ struct ResourceParameterSpecializationCondition : FunctionCallSpecializeConditio
TargetRequest* targetRequest = nullptr;
- bool doesParamNeedSpecialization(IRParam* param)
+ bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
{
+ SLANG_UNUSED(arg);
+
// Whether or not a parameter needs specialization is really
// a function of its type:
//
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index d2d15735c..c7e32072e 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -2014,6 +2014,40 @@ LoweredValInfo createVar(
return LoweredValInfo::ptr(irAlloc);
}
+// When we try to turn a `LoweredValInfo` into an address of some temporary storage,
+// we can either do it "aggressively" or not (what we'll call the "default" behavior,
+// although it isn't strictly more common).
+//
+// The case that this is mostly there to address is when somebody writes an operation
+// like:
+//
+// foo[a] = b;
+//
+// In that case, we might as well just use the `set` accessor if there is one, rather
+// than complicate things. However, in more complex cases like:
+//
+// foo[a].x = b;
+//
+// there is no way to satisfy the semantics of the code the user wrote (in terms of
+// only writing one vector component, and not a full vector) by using the `set`
+// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into
+// an address.
+//
+// TODO: realistically IR lowering is too early to be binding to this choice,
+// because different accessors might be supported on different targets.
+//
+enum class TryGetAddressMode
+{
+ Default,
+ Aggressive,
+};
+
+/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address.
+LoweredValInfo tryGetAddress(
+ IRGenContext* context,
+ LoweredValInfo const& inVal,
+ TryGetAddressMode mode);
+
/// Add a single `in` argument value to a list of arguments
void addInArg(
IRGenContext* context,
@@ -2092,59 +2126,49 @@ void addArg(
// According to our "calling convention" we need to
// pass a pointer into the callee.
//
- // A naive approach would be to just take the address
- // of `loweredArg` above and pass it in, but that
- // has two issues:
- //
- // 1. The l-value might not be something that has a single
- // well-defined "address" (e.g., `foo.xzy`).
- //
- // 2. The l-value argument might actually alias some other
- // storage that the callee will access (e.g., we are
- // passing in a global variable, or two `out` parameters
- // are being passed the same location in an array).
- //
- // In each of these cases, the safe option is to create
- // a temporary variable to use for argument-passing,
- // and then do copy-in/copy-out around the call.
+ // Ideally we would like to just pass the address of
+ // `loweredArg`, and when that it possible we will do so.
+ // It may happen, though, that `loweredArg` is not an
+ // addressable l-value (e.g., it is `foo.xyz`, so that
+ // the bytes of the l-value are not contiguous).
//
- // TODO: We should consider ruling out case (2) as undefined
- // behavior, and specify that whether `inout` and `out` are
- // handled via copy-in-copy-out or by-reference parameter
- // passing is an implementation detail. That would allow
- // us to avoid introducing a copy except where it is required
- // for the semantics of (1).
- //
- // TODO: We should confirm whether such a change will make
- // it harder to create SSA values for variables that get
- // used with `out` or `inout` parameters.
-
- LoweredValInfo tempVar = createVar(context, paramType);
-
- // If the parameter is `in out` or `inout`, then we need
- // to ensure that we pass in the original value stored
- // in the argument, which we accomplish by assigning
- // from the l-value to our temp.
- if(paramDirection == kParameterDirection_InOut)
+ LoweredValInfo argPtr = tryGetAddress(context, argVal, TryGetAddressMode::Default);
+ if(argPtr.flavor == LoweredValInfo::Flavor::Ptr)
{
- assign(context, tempVar, argVal);
+ addInArg(context, ioArgs, LoweredValInfo::simple(argPtr.val));
}
+ else
+ {
+ // If the value is not one that could yield a simple l-value
+ // then we need to convert it into a temporary
+ //
+ LoweredValInfo tempVar = createVar(context, paramType);
- // Now we can pass the address of the temporary variable
- // to the callee as the actual argument for the `in out`
- SLANG_ASSERT(tempVar.flavor == LoweredValInfo::Flavor::Ptr);
- IRInst* tempPtr = getAddress(context, tempVar, loc);
- addInArg(context, ioArgs, LoweredValInfo::simple(tempPtr));
+ // If the parameter is `in out` or `inout`, then we need
+ // to ensure that we pass in the original value stored
+ // in the argument, which we accomplish by assigning
+ // from the l-value to our temp.
+ //
+ if (paramDirection == kParameterDirection_InOut)
+ {
+ assign(context, tempVar, argVal);
+ }
- // Finally, after the call we will need
- // to copy in the other direction: from our
- // temp back to the original l-value.
- OutArgumentFixup fixup;
- fixup.src = tempVar;
- fixup.dst = argVal;
+ // Now we can pass the address of the temporary variable
+ // to the callee as the actual argument for the `in out`
+ SLANG_ASSERT(tempVar.flavor == LoweredValInfo::Flavor::Ptr);
+ IRInst* tempPtr = getAddress(context, tempVar, loc);
+ addInArg(context, ioArgs, LoweredValInfo::simple(tempPtr));
- (*ioFixups).add(fixup);
+ // Finally, after the call we will need
+ // to copy in the other direction: from our
+ // temp back to the original l-value.
+ OutArgumentFixup fixup;
+ fixup.src = tempVar;
+ fixup.dst = argVal;
+ (*ioFixups).add(fixup);
+ }
}
break;
@@ -2196,40 +2220,6 @@ void addCallArgsForParam(
//
-// When we try to turn a `LoweredValInfo` into an address of some temporary storage,
-// we can either do it "aggressively" or not (what we'll call the "default" behavior,
-// although it isn't strictly more common).
-//
-// The case that this is mostly there to address is when somebody writes an operation
-// like:
-//
-// foo[a] = b;
-//
-// In that case, we might as well just use the `set` accessor if there is one, rather
-// than complicate things. However, in more complex cases like:
-//
-// foo[a].x = b;
-//
-// there is no way to satisfy the semantics of the code the user wrote (in terms of
-// only writing one vector component, and not a full vector) by using the `set`
-// accessor, and we need to be "aggressive" in turning the lvalue `foo[a]` into
-// an address.
-//
-// TODO: realistically IR lowering is too early to be binding to this choice,
-// because different accessors might be supported on different targets.
-//
-enum class TryGetAddressMode
-{
- Default,
- Aggressive,
-};
-
-/// Try to coerce `inVal` into a `LoweredValInfo::ptr()` with a simple address.
-LoweredValInfo tryGetAddress(
- IRGenContext* context,
- LoweredValInfo const& inVal,
- TryGetAddressMode mode);
-
/// Compute the direction for a parameter based on its declaration
ParameterDirection getParameterDirection(VarDeclBase* paramDecl)
{