summaryrefslogtreecommitdiffstats
path: root/source/slang/slang-ir-specialize-buffer-load-arg.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang/slang-ir-specialize-buffer-load-arg.cpp')
-rw-r--r--source/slang/slang-ir-specialize-buffer-load-arg.cpp124
1 files changed, 83 insertions, 41 deletions
diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.cpp b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
index 905f2e058..a5a3dd2d9 100644
--- a/source/slang/slang-ir-specialize-buffer-load-arg.cpp
+++ b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
@@ -1,8 +1,11 @@
// slang-ir-specialize-buffer-load-arg.cpp
#include "slang-ir-specialize-buffer-load-arg.h"
+#include "slang-ir-defer-buffer-load.h"
#include "slang-ir-insts.h"
+#include "slang-ir-layout.h"
#include "slang-ir-specialize-function-call.h"
+#include "slang-ir-util.h"
#include "slang-ir.h"
namespace Slang
@@ -17,76 +20,115 @@ namespace Slang
// As swith most of our IR passes, we encapsulate the logic here in a context
// type so that the data that needs to be shared throughout the pass can
// be conveniently scoped.
+//
+
+// Note that this pass also ensures other more contrived cases are properly
+// handled. For example:
+//
+// * A load of a large structure from field in a constant buffer, so that
+// the value loaded is not the entire buffer contents.
+//
+// * A load of a large structure from a structured buffer, or any other kind
+// of buffer that requires an index.
+//
struct FuncBufferLoadSpecializationCondition : FunctionCallSpecializeCondition
{
typedef FunctionCallSpecializeCondition Super;
- virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
+ CodeGenContext* codegenContext;
+
+ virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst)
{
// We only want to specialize for `struct` types and not base types.
//
- // TODO: We might want to consider some criteria here for the "large-ness"
- // of a structure (in terms of bytes and/or fields), so that we don't
- // eliminate loads of sufficiently small types (which are cheap to pass
- // by value).
- //
- auto paramType = param->getDataType();
- if (!as<IRStructType>(paramType))
+ auto paramType = (IRType*)unwrapAttributedType(param->getDataType());
+ if (!isTypePreferrableToDeferLoad(codegenContext, paramType))
return false;
- // We also only want to specialize for arguments that are a load
- // from some kind of global shader parameter.
+ // We want to handle loads from arbitrary access chains rooting from a shader parameter.
//
IRInst* a = arg;
- if (auto argLoad = as<IRLoad>(arg))
- {
- a = argLoad->getPtr();
- }
- else
+ for (;;)
{
- return false;
- }
+ // A user pointer can be directly passed into the function, so we no
+ // longer need to trace up further.
+ if (isUserPointerType(a->getDataType()))
+ break;
- // We want to handle loads from a shader parameter that is an array
- // of buffers, and not just a single global buffer.
- //
- while (auto argGetElement = as<IRGetElement>(a))
- {
- a = argGetElement->getBase();
+ if (auto argGetElement = as<IRGetElement>(a))
+ {
+ a = argGetElement->getBase();
+ }
+ else if (auto argSbLoad = as<IRStructuredBufferLoad>(a))
+ {
+ a = argSbLoad->getOperand(0);
+ }
+ else if (auto argBbLoad = as<IRByteAddressBufferLoad>(a))
+ {
+ a = argBbLoad->getOperand(0);
+ }
+ else if (auto argFieldExtract = as<IRFieldExtract>(a))
+ {
+ a = argFieldExtract->getBase();
+ }
+ else if (auto argGetElementPtr = as<IRGetElementPtr>(a))
+ {
+ a = argGetElementPtr->getBase();
+ }
+ else if (auto argSBGetElementPtr = as<IRRWStructuredBufferGetElementPtr>(a))
+ {
+ a = argSBGetElementPtr->getBase();
+ }
+ else if (auto argFieldAddr = as<IRFieldAddress>(a))
+ {
+ a = argFieldAddr->getBase();
+ }
+ else if (auto argLoad = as<IRLoad>(a))
+ {
+ a = argLoad->getPtr();
+
+ // We can safely defer a load to the callee if the source dest is immutable.
+ if (isPointerToImmutableLocation(a))
+ continue;
+
+ // Otherwise, we check if there is no other instructions in between the load and the
+ // call that can modify the memory location. If so, we can still safely defer the
+ // load to the callee.
+ if (!isMemoryLocationUnmodifiedBetweenLoadAndUser(
+ codegenContext->getTargetReq(),
+ argLoad,
+ callInst))
+ return false;
+ }
+ else
+ {
+ break;
+ }
}
- // The "root" of the parameter must be a reference to a global-scope
- // shader parameter, so that we know we can substitute it into the callee.
+ // The "root" of the parameter must be one of the following:
+ // 1. A reference to a global-scope shader parameter that can be referenced directly from
+ // the callee.
+ // 2. A user pointer or bindless resource handle that can be passed to the callee as
+ // ordinary argument.
//
if (const auto argGlobalParam = as<IRGlobalParam>(a))
{
return true;
}
- else
+ else if (isUserPointerType(a->getDataType()) || as<IRCastDescriptorHandleToResource>(a))
{
- return false;
+ return true;
}
-
- // TODO: There are other patterns that we could attempt to optimize here.
- // For example, this logic only handles loads of the *entire* contents of
- // a buffer, so it would miss:
- //
- // * A load of a large structure from field in a constant buffer, so that
- // the value loaded is not the entire buffer contents.
- //
- // * A load of a large structure from a structured buffer, or any other kind
- // of buffer that requires an index.
- //
- // * Any resource load that is not expressed at the IR level with a `load`
- // instruction (e.g., those that might use an intrinsic function).
- //
+ return false;
}
};
void specializeFuncsForBufferLoadArgs(CodeGenContext* codegenContext, IRModule* module)
{
FuncBufferLoadSpecializationCondition condition;
+ condition.codegenContext = codegenContext;
specializeFunctionCalls(codegenContext, module, &condition);
}