summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/slang/slang-emit-wgsl.cpp5
-rw-r--r--source/slang/slang-emit.cpp8
-rw-r--r--source/slang/slang-ir-defer-buffer-load.cpp326
-rw-r--r--source/slang/slang-ir-defer-buffer-load.h22
-rw-r--r--source/slang/slang-ir-defunctionalization.cpp2
-rw-r--r--source/slang/slang-ir-glsl-legalize.cpp10
-rw-r--r--source/slang/slang-ir-metal-legalize.cpp2
-rw-r--r--source/slang/slang-ir-specialize-address-space.cpp43
-rw-r--r--source/slang/slang-ir-specialize-arrays.cpp32
-rw-r--r--source/slang/slang-ir-specialize-buffer-load-arg.cpp124
-rw-r--r--source/slang/slang-ir-specialize-function-call.cpp205
-rw-r--r--source/slang/slang-ir-specialize-function-call.h4
-rw-r--r--source/slang/slang-ir-specialize-resources.cpp3
-rw-r--r--source/slang/slang-ir-util.cpp246
-rw-r--r--source/slang/slang-ir-util.h8
15 files changed, 744 insertions, 296 deletions
diff --git a/source/slang/slang-emit-wgsl.cpp b/source/slang/slang-emit-wgsl.cpp
index 53c3aa487..b115c723a 100644
--- a/source/slang/slang-emit-wgsl.cpp
+++ b/source/slang/slang-emit-wgsl.cpp
@@ -295,6 +295,11 @@ void WGSLSourceEmitter::emitStructFieldAttributes(
{
SLANG_UNUSED(allowOffsetLayout);
+ // If the struct type is not used for physical storage, then we don't need to
+ // emit any layout attributes.
+ if (!structType->findDecoration<IRPhysicalTypeDecoration>())
+ return;
+
// Tint emits errors unless we explicitly spell out the layout in some cases, so emit
// offset and align attribtues for all fields.
IRSizeAndAlignmentDecoration* const sizeAndAlignmentDecoration =
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index f1cc6090d..09c2efea9 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -1387,16 +1387,10 @@ Result linkAndOptimizeIR(
specializeFuncsForBufferLoadArgs(codeGenContext, irModule);
// Push `structuredBufferLoad` to the end of access chain to avoid loading unnecessary data.
- if (isKhronosTarget(targetRequest) || isMetalTarget(targetRequest) ||
- isWGPUTarget(targetRequest))
- deferBufferLoad(irModule);
+ deferBufferLoad(codeGenContext, irModule);
// We also want to specialize calls to functions that
// takes unsized array parameters if possible.
- // Moreover, for Khronos targets, we also want to specialize calls to functions
- // that takes arrays/structs containing arrays as parameters with the actual
- // global array object to avoid loading big arrays into SSA registers, which seems
- // to cause performance issues.
specializeArrayParameters(codeGenContext, irModule);
#if 0
diff --git a/source/slang/slang-ir-defer-buffer-load.cpp b/source/slang/slang-ir-defer-buffer-load.cpp
index 51c6a161b..ccdfe4538 100644
--- a/source/slang/slang-ir-defer-buffer-load.cpp
+++ b/source/slang/slang-ir-defer-buffer-load.cpp
@@ -3,142 +3,211 @@
#include "slang-ir-clone.h"
#include "slang-ir-dominators.h"
#include "slang-ir-insts.h"
+#include "slang-ir-layout.h"
#include "slang-ir-redundancy-removal.h"
#include "slang-ir-util.h"
#include "slang-ir.h"
namespace Slang
{
-struct DeferBufferLoadContext
-{
- // Map an original SSA value to a pointer that can be used to load the value.
- Dictionary<IRInst*, IRInst*> mapValueToPtr;
- // Map an ptr to its loaded value.
- Dictionary<IRInst*, IRInst*> mapPtrToValue;
+// Generally, we want to specialize arguments that are large in size, or arguments that
+// are arrays or composite type that contains arrays.
+// This is because:
+// 1. Struct types without arrays will eventually be SROA's into registers and then effectively
+// DCE'd, so they usually won't cause performance issues. In fact, front loading structs
+// and reusing the loaded value instead of repetitively loading from constant memory is
+// usually beneficial to performance. However large struct values can be SROA'd into a large
+// number of registers, causing slow downstream compilation. Therefore we should avoid/defer
+// loading them into registers if we can.
+// 2. Arrays usually cannot be SROA'd into individual registers, which usually leads to
+// large register consumption if they ever get loaded, so we want to defer loading array
+// typed values as much as possible.
- IRFunc* currentFunc = nullptr;
+// If the argument data is bigger than this threshold, it is considered a large object
+// and we will try to specialize it even if it doesn't contain arrays.
+static const int kBufferLoadElementSizeSpecializationThreshold = 128;
- // Ensure that for an original SSA value, we have formed a pointer that can be used to load the
- // value.
- IRInst* ensurePtr(IRInst* valueInst)
- {
- IRInst* result = nullptr;
- if (mapValueToPtr.tryGetValue(valueInst, result))
- return result;
+// If the argument data is smaller than this threshold, it is considered a tiny object
+// and we will not consider specializing it, even if it contains arrays.
+static const int kBufferLoadElementSizeSpecializationMinThreshold = 16;
- IRBuilder b(valueInst);
- b.setInsertBefore(valueInst);
-
- switch (valueInst->getOp())
+static bool isCompositeTypeContainingArrays(IRType* type)
+{
+ if (auto structType = as<IRStructType>(type))
+ {
+ for (auto field : structType->getFields())
{
- case kIROp_StructuredBufferLoad:
- case kIROp_StructuredBufferLoadStatus:
- {
- result = b.emitRWStructuredBufferGetElementPtr(
- valueInst->getOperand(0),
- valueInst->getOperand(1));
- break;
- }
- case kIROp_GetElement:
+ if (const auto arrayType = as<IRArrayTypeBase>(field->getFieldType()))
{
- auto ptr = ensurePtr(valueInst->getOperand(0));
- if (!ptr)
- return nullptr;
- result = b.emitElementAddress(ptr, valueInst->getOperand(1));
- break;
+ return true;
}
- case kIROp_FieldExtract:
+ if (auto subStructType = as<IRStructType>(field->getFieldType()))
{
- auto ptr = ensurePtr(valueInst->getOperand(0));
- if (!ptr)
- return nullptr;
- result = b.emitFieldAddress(ptr, valueInst->getOperand(1));
- break;
+ if (isCompositeTypeContainingArrays(subStructType))
+ return true;
}
- case kIROp_Load:
- result = valueInst->getOperand(0);
- break;
- }
- if (result)
- {
- mapValueToPtr[valueInst] = result;
}
- return result;
}
+ else if (as<IRArrayTypeBase>(type))
+ {
+ return true;
+ }
+ return false;
+}
- static bool isImmutableBufferLoad(IRInst* inst)
+bool isTypePreferrableToDeferLoad(CodeGenContext* codeGenContext, IRType* type)
+{
+ // If parameter is a pointer/reference, we should consider specialize it.
+ if (as<IROutTypeBase>(type) || as<IRRefType>(type) || as<IRConstRefType>(type))
+ return true;
+
+ // We only want to defer loading values that are "large enough" that
+ // we expect them to be expensive to pass by value.
+ //
+ IRSizeAndAlignment sizeAlignment = {};
+ if (SLANG_FAILED(getNaturalSizeAndAlignment(
+ codeGenContext->getTargetProgram()->getOptionSet(),
+ type,
+ &sizeAlignment)))
{
- // Note: we cannot defer loads from RWStructuredBuffer because there can be other
- // instructions that modify the buffer.
+ // If type contains fields that we don't know how to compute natural size
+ // for, default to specialize if it contains arrays.
+ return isCompositeTypeContainingArrays(type);
+ }
+
+ // If the argument is very small, don't bother specializing.
+ if (sizeAlignment.size <= kBufferLoadElementSizeSpecializationMinThreshold)
+ return false;
+
+ // If the argument is somewhat small, don't specialize, unless it contains
+ // arrays.
+ if (sizeAlignment.size <= kBufferLoadElementSizeSpecializationThreshold)
+ {
+ // We generally do not specialize for small values, except it contains
+ // arrays that usually present a challenge for the SROA pass to eliminate
+ // unnecessary loads.
+ if (!isCompositeTypeContainingArrays(type))
+ return false;
+ }
+ return true;
+}
+
+// Returns true if memory loaded by `loadInst` is not modified before `userInst` after it is
+// loaded.
+// This method is currently implementing a very conservative analysis that only allows
+// `loadInst` to be in the same block as `userInst`, with basic aliasing analysis for any
+// stores in between. All other cases are conservatively treated as the memory location may be
+// modified.
+bool isMemoryLocationUnmodifiedBetweenLoadAndUser(
+ TargetRequest* target,
+ IRInst* loadInst,
+ IRInst* userInst)
+{
+ auto func = getParentFunc(loadInst);
+ if (!func)
+ return false;
+
+ // For now we only check if loadInst and userInst are in the same block.
+ if (loadInst->getParent() != userInst->getParent())
+ return false;
+
+ for (IRInst* inst = loadInst->getNextInst(); inst; inst = inst->getNextInst())
+ {
+ // We found callInst before hitting any instruction that may modify the memory.
+ if (inst == userInst)
+ return true;
+
+ if (!inst->mightHaveSideEffects())
+ continue;
+
+ // If we see any inst that has side effect, check if it is simple case that we can rule
+ // out the possibility of modifying the memory location.
switch (inst->getOp())
{
- case kIROp_StructuredBufferLoad:
- case kIROp_StructuredBufferLoadStatus:
- return true;
- case kIROp_Load:
+ case kIROp_Store:
{
- auto rootAddr = getRootAddr(inst->getOperand(0));
- return isPointerToImmutableLocation(rootAddr);
+ auto storedDest = inst->getOperand(0);
+ if (canAddressesPotentiallyAlias(target, func, loadInst->getOperand(0), storedDest))
+ return false;
+ continue;
}
default:
+ // For any other case, conservatively assume the memory location may be modified.
return false;
}
}
+ // We didn't found callInst after loadInst within the same basic block.
+ // We conservatively assume the memory location may be modified.
+ // This check can be extended to use the dominator tree to allow
+ // loadInst and userInst to be in different blocks.
+ return false;
+}
- // Ensure that for a pointer value, we have created a load instruction to materialize the value.
- IRInst* materializePointer(IRBuilder& builder, IRInst* loadInst)
+struct DeferBufferLoadContext
+{
+ CodeGenContext* codeGenContext;
+
+
+ void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst)
{
- auto ptr = ensurePtr(loadInst);
- if (!ptr)
- return nullptr;
- IRInst* result = nullptr;
- if (mapPtrToValue.tryGetValue(ptr, result))
- return result;
- IRAlignedAttr* align = nullptr;
- if (auto load = as<IRLoad>(loadInst))
- align = load->findAttr<IRAlignedAttr>();
- if (!as<IRModuleInst>(ptr->getParent()))
+ // Don't defer the load anymore if the type is simple.
+ if (!isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()) ||
+ loadInst->findAttr<IRAlignedAttr>())
{
- setInsertAfterOrdinaryInst(&builder, ptr);
- IRType* valueType = tryGetPointedToType(&builder, ptr->getFullType());
- result = builder.emitLoad(valueType, ptr, align);
- mapPtrToValue[ptr] = result;
+ return;
}
- else
+
+ auto rootAddr = getRootAddr(loadInst->getOperand(0));
+ bool isImmutableBufferLoad = isPointerToImmutableLocation(rootAddr);
+
+ // Don't defer the load if there are uses that are not getElement or fieldExtract.
+ // Because in this case we need to use the entire loaded value, and further deferring
+ // the load down any access chain will introduce redundant loads.
+ for (auto use = loadInst->firstUse; use; use = use->nextUse)
{
- setInsertBeforeOrdinaryInst(&builder, loadInst);
- IRType* valueType = tryGetPointedToType(&builder, ptr->getFullType());
- result = builder.emitLoad(valueType, ptr, align);
- // Since we are inserting the load in a local scope, we can't register
- // the mapping to the pointer, since the global pointer needs to be
- // loaded once per function.
+ auto user = use->getUser();
+ switch (user->getOp())
+ {
+ case kIROp_GetElement:
+ case kIROp_FieldExtract:
+ // Can we defer the load to load only the requested element right before
+ // the element extract inst?
+ // If the buffer is immutable, we can always do that.
+ // If it is not, we need to make sure there is no other instructions that can modify
+ // the buffer between the load and the use.
+ //
+ if (isImmutableBufferLoad)
+ continue;
+ if (isMemoryLocationUnmodifiedBetweenLoadAndUser(
+ codeGenContext->getTargetReq(),
+ loadInst,
+ user))
+ continue;
+ return;
+ default:
+ // If we see any other use the laod instruction, we assume the entire loaded value
+ // is needed, and we can't defer the load anymore.
+ return;
+ }
}
- return result;
- }
- static bool isSimpleType(IRInst* type)
- {
- if (auto modType = as<IRRateQualifiedType>(type))
- type = modType->getValueType();
- if (as<IRStructType>(type))
- return false;
- if (as<IRTupleType>(type))
- return false;
- if (as<IRArrayTypeBase>(type))
- return false;
- return true;
- }
+ // If we reach here, it means all uses are getElement or fieldExtract, and
+ // it is safe to defer the load down the access chain.
- void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst)
- {
- // Don't defer the load anymore if the type is simple.
- if (isSimpleType(loadInst->getDataType()) || loadInst->findAttr<IRAlignedAttr>())
+ if (loadInst->getOp() == kIROp_StructuredBufferLoad)
{
- auto materializedVal = materializePointer(builder, loadInst);
- loadInst->transferDecorationsTo(materializedVal);
- loadInst->replaceUsesWith(materializedVal);
- return;
+ // Convert the structuredBufferLoad to a regular load to reuse
+ // the same logic for deferring regular loads.
+ builder.setInsertBefore(loadInst);
+ auto bufferPtr = builder.emitRWStructuredBufferGetElementPtr(
+ loadInst->getOperand(0),
+ loadInst->getOperand(1));
+ auto sbLoad = builder.emitLoad(bufferPtr);
+ loadInst->transferDecorationsTo(sbLoad);
+ loadInst->replaceUsesWith(sbLoad);
+ loadInst->removeAndDeallocate();
+ loadInst = sbLoad;
}
// Otherwise, look for all uses and try to defer the load before actual use of the value.
@@ -148,19 +217,29 @@ struct DeferBufferLoadContext
loadInst,
[&](IRUse* use)
{
- if (needMaterialize)
- return;
-
auto user = use->getUser();
+
switch (user->getOp())
{
case kIROp_GetElement:
case kIROp_FieldExtract:
{
- auto basePtr = ensurePtr(loadInst);
- if (!basePtr)
- return;
- pendingWorkList.add(user);
+ // If we see a getElement or fieldExtract, we defer the load by
+ // replacing the getElement/fieldExtract with a load of the
+ // elementAddr/fieldAddr.
+ builder.setInsertBefore(user);
+ auto basePtr = loadInst->getOperand(0);
+ IRInst* gepArg = user->getOperand(1);
+ auto elementPtr = builder.emitElementAddress(
+ basePtr,
+ makeArrayViewSingle<IRInst*>(gepArg));
+ auto newLoad = builder.emitLoad(elementPtr);
+ user->transferDecorationsTo(newLoad);
+ user->replaceUsesWith(newLoad);
+ user->removeAndDeallocate();
+
+ // Now add the new load to work list to try to defer it further.
+ pendingWorkList.add(newLoad);
}
break;
default:
@@ -169,41 +248,37 @@ struct DeferBufferLoadContext
}
});
- if (needMaterialize)
- {
- auto val = materializePointer(builder, loadInst);
- loadInst->transferDecorationsTo(val);
- loadInst->replaceUsesWith(val);
- loadInst->removeAndDeallocate();
- }
- else
- {
- // Append to worklist in reverse order so we process the uses in natural appearance
- // order.
- for (Index i = pendingWorkList.getCount() - 1; i >= 0; i--)
- workList.add(pendingWorkList[i]);
- }
+ // Append to worklist in reverse order so we process the uses in natural appearance
+ // order.
+ for (Index i = pendingWorkList.getCount() - 1; i >= 0; i--)
+ workList.add(pendingWorkList[i]);
}
void deferBufferLoadInFunc(IRFunc* func)
{
removeRedundancyInFunc(func, false);
- currentFunc = func;
-
List<IRInst*> workList;
+ // Discover all load instructions and add to work list.
+
for (auto block : func->getBlocks())
{
for (auto inst : block->getChildren())
{
- if (isImmutableBufferLoad(inst))
+ switch (inst->getOp())
{
+ case kIROp_Load:
+ case kIROp_StructuredBufferLoad:
+ // Note: We don't handle `kIROp_StructuredBufferLoadStatus` here because
+ // it also writes to the status code out parameter, which we can't defer.
workList.add(inst);
+ break;
}
}
}
+ // Iteratively process the work list until it is empty.
IRBuilder builder(func);
for (Index i = 0; i < workList.getCount(); i++)
{
@@ -227,9 +302,10 @@ struct DeferBufferLoadContext
}
};
-void deferBufferLoad(IRModule* module)
+void deferBufferLoad(CodeGenContext* codeGenContext, IRModule* module)
{
DeferBufferLoadContext context;
+ context.codeGenContext = codeGenContext;
for (auto childInst : module->getGlobalInsts())
{
if (auto code = as<IRGlobalValueWithCode>(childInst))
diff --git a/source/slang/slang-ir-defer-buffer-load.h b/source/slang/slang-ir-defer-buffer-load.h
index b54271883..0f692b39a 100644
--- a/source/slang/slang-ir-defer-buffer-load.h
+++ b/source/slang/slang-ir-defer-buffer-load.h
@@ -4,9 +4,8 @@ namespace Slang
{
/*
-This pass implements a targeted optimization that defers the loading of structured buffer elements
-to the end of the access chain to avoid loading and repacking unnecessary data.
-For example, if we see:
+This pass implements a intra-function optimization that defers the loading of buffer
+elements to the end of the access chain to avoid loading unnecessary data. For example, if we see:
val = StructuredBufferLoad(s, i)
val2 = GetElement(val, j)
val3 = FieldExtract(val2, field_key_0)
@@ -20,7 +19,22 @@ We should rewrite the code into:
*/
struct IRModule;
+struct IRType;
+struct CodeGenContext;
+struct IRInst;
+class TargetRequest;
-void deferBufferLoad(IRModule* module);
+void deferBufferLoad(CodeGenContext* context, IRModule* module);
+
+// Returns true if the type is suitable for defer-load optimization.
+// Generally, we want to defer loading large structs or composites that contain arrays.
+bool isTypePreferrableToDeferLoad(CodeGenContext* context, IRType* type);
+
+// Returns true if memory loaded by `loadInst` may be modified before `userInst` after it is
+// loaded.
+bool isMemoryLocationUnmodifiedBetweenLoadAndUser(
+ TargetRequest* target,
+ IRInst* loadInst,
+ IRInst* userInst);
} // namespace Slang
diff --git a/source/slang/slang-ir-defunctionalization.cpp b/source/slang/slang-ir-defunctionalization.cpp
index af84ec78a..424971f90 100644
--- a/source/slang/slang-ir-defunctionalization.cpp
+++ b/source/slang/slang-ir-defunctionalization.cpp
@@ -12,7 +12,7 @@ struct FunctionParameterSpecializationCondition : FunctionCallSpecializeConditio
{
TargetRequest* targetRequest = nullptr;
- bool doesParamWantSpecialization(IRParam* param, IRInst* /*arg*/)
+ bool doesParamWantSpecialization(IRParam* param, IRInst* /*arg*/, IRCall* /*callInst*/)
{
IRType* type = param->getDataType();
return as<IRFuncType>(type);
diff --git a/source/slang/slang-ir-glsl-legalize.cpp b/source/slang/slang-ir-glsl-legalize.cpp
index a79ca2379..d87d96da0 100644
--- a/source/slang/slang-ir-glsl-legalize.cpp
+++ b/source/slang/slang-ir-glsl-legalize.cpp
@@ -2694,7 +2694,10 @@ static void legalizeMeshPayloadInputParam(
pp->replaceUsesWith(g);
struct MeshPayloadInputSpecializationCondition : FunctionCallSpecializeCondition
{
- bool doesParamWantSpecialization(IRParam*, IRInst* arg) { return arg == g; }
+ bool doesParamWantSpecialization(IRParam*, IRInst* arg, IRCall* /*call*/)
+ {
+ return arg == g;
+ }
IRInst* g;
} condition;
condition.g = g;
@@ -2794,7 +2797,10 @@ static void legalizeMeshOutputParam(
// pp is only removed later on, so sadly we have to keep it around for now
struct MeshOutputSpecializationCondition : FunctionCallSpecializeCondition
{
- bool doesParamWantSpecialization(IRParam*, IRInst* arg) { return arg == g; }
+ bool doesParamWantSpecialization(IRParam*, IRInst* arg, IRCall* /*call*/)
+ {
+ return arg == g;
+ }
IRInst* g;
} condition;
condition.g = g;
diff --git a/source/slang/slang-ir-metal-legalize.cpp b/source/slang/slang-ir-metal-legalize.cpp
index e66617e72..e91da136a 100644
--- a/source/slang/slang-ir-metal-legalize.cpp
+++ b/source/slang/slang-ir-metal-legalize.cpp
@@ -172,7 +172,7 @@ struct MetalAddressSpaceAssigner : InitialAddressSpaceAssigner
{
if (ptrType->hasAddressSpace())
return ptrType->getAddressSpace();
- return AddressSpace::Global;
+ return AddressSpace::Generic;
}
return AddressSpace::Generic;
}
diff --git a/source/slang/slang-ir-specialize-address-space.cpp b/source/slang/slang-ir-specialize-address-space.cpp
index c4a155eec..04792bd8b 100644
--- a/source/slang/slang-ir-specialize-address-space.cpp
+++ b/source/slang/slang-ir-specialize-address-space.cpp
@@ -131,7 +131,6 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext
bool processFunction(IRFunc* func)
{
bool retValAddrSpaceChanged = false;
- Dictionary<IRInst*, AddressSpace> mapVarValueToAddrSpace;
bool changed = true;
while (changed)
{
@@ -152,18 +151,23 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext
continue;
}
- // If the inst already has a pointer type with explicit address space, then use
- // it.
- if (auto ptrType = as<IRPtrTypeBase>(inst->getDataType()))
+ // If the inst already has a pointer/pointer-like type with explicit address
+ // space, then use it.
+ auto addrSpaceFromType =
+ addrSpaceAssigner->getAddressSpaceFromVarType(inst->getDataType());
+ if (addrSpaceFromType != AddressSpace::Generic)
{
- if (ptrType->hasAddressSpace())
- {
- mapInstToAddrSpace[inst] = ptrType->getAddressSpace();
+ mapInstToAddrSpace[inst] = addrSpaceFromType;
+ changed = true;
+
+ // Don't return early if the inst itself is a call, as we may still need to
+ // specialize it down below.
+ if (inst->getOp() != kIROp_Call)
continue;
- }
}
- // Otherwise, try to assign an address space based on the instruction type.
+ // Try to assign an address space based on the instruction type, and specialize
+ // calls.
switch (inst->getOp())
{
case kIROp_Var:
@@ -195,15 +199,6 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext
}
break;
case kIROp_Store:
- {
- auto addrSpace = getAddrSpace(inst->getOperand(1));
- if (addrSpace != AddressSpace::Generic)
- {
- mapVarValueToAddrSpace[inst->getOperand(0)] = addrSpace;
- mapInstToAddrSpace[inst] = addrSpace;
- changed = true;
- }
- }
break;
case kIROp_Param:
if (!isFirstBlock)
@@ -243,8 +238,9 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext
for (UInt i = 0; i < callInst->getArgCount(); i++)
{
auto arg = callInst->getArg(i);
- argAddrSpaces.add(getAddrSpace(arg));
- if (as<IRPtrTypeBase>(arg->getDataType()))
+ auto addrSpace = getAddrSpace(arg);
+ argAddrSpaces.add(addrSpace);
+ if (addrSpace != AddressSpace::Generic)
{
hasSpecializableArg = true;
}
@@ -477,8 +473,13 @@ void propagateAddressSpaceFromInsts(List<IRInst*>&& workList)
}
}
-AddressSpace NoOpInitialAddressSpaceAssigner::getAddressSpaceFromVarType(IRInst*)
+AddressSpace NoOpInitialAddressSpaceAssigner::getAddressSpaceFromVarType(IRInst* type)
{
+ if (auto ptrType = as<IRPtrTypeBase>(type))
+ {
+ if (ptrType->hasAddressSpace())
+ return ptrType->getAddressSpace();
+ }
return AddressSpace::Generic;
}
diff --git a/source/slang/slang-ir-specialize-arrays.cpp b/source/slang/slang-ir-specialize-arrays.cpp
index 4a4a72ee9..edb6cfa28 100644
--- a/source/slang/slang-ir-specialize-arrays.cpp
+++ b/source/slang/slang-ir-specialize-arrays.cpp
@@ -11,38 +11,14 @@ namespace Slang
struct ArrayParameterSpecializationCondition : FunctionCallSpecializeCondition
{
// This pass is intended to specialize functions
- // with struct parameters that has array fields
- // to avoid performance problems for GLSL targets.
- // Returns true if `type` is an `IRStructType` with array-typed fields.
- // It will also specialize functions with unsized array parameters into
- // sized arrays, if the function is called with an argument that has a
- // sized array type.
+ // with unsized array parameter called with a sized-array argument.
//
- bool isStructTypeWithArray(IRType* type)
- {
- if (auto structType = as<IRStructType>(type))
- {
- for (auto field : structType->getFields())
- {
- if (const auto arrayType = as<IRArrayType>(field->getFieldType()))
- {
- return true;
- }
- if (auto subStructType = as<IRStructType>(field->getFieldType()))
- {
- if (isStructTypeWithArray(subStructType))
- return true;
- }
- }
- }
- return false;
- }
- bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
+ bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst)
{
+ SLANG_UNUSED(param);
SLANG_UNUSED(arg);
- if (isKhronosTarget(codeGenContext->getTargetReq()))
- return isStructTypeWithArray(param->getDataType());
+ SLANG_UNUSED(callInst);
return false;
}
diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.cpp b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
index 905f2e058..a5a3dd2d9 100644
--- a/source/slang/slang-ir-specialize-buffer-load-arg.cpp
+++ b/source/slang/slang-ir-specialize-buffer-load-arg.cpp
@@ -1,8 +1,11 @@
// slang-ir-specialize-buffer-load-arg.cpp
#include "slang-ir-specialize-buffer-load-arg.h"
+#include "slang-ir-defer-buffer-load.h"
#include "slang-ir-insts.h"
+#include "slang-ir-layout.h"
#include "slang-ir-specialize-function-call.h"
+#include "slang-ir-util.h"
#include "slang-ir.h"
namespace Slang
@@ -17,76 +20,115 @@ namespace Slang
// As swith most of our IR passes, we encapsulate the logic here in a context
// type so that the data that needs to be shared throughout the pass can
// be conveniently scoped.
+//
+
+// Note that this pass also ensures other more contrived cases are properly
+// handled. For example:
+//
+// * A load of a large structure from field in a constant buffer, so that
+// the value loaded is not the entire buffer contents.
+//
+// * A load of a large structure from a structured buffer, or any other kind
+// of buffer that requires an index.
+//
struct FuncBufferLoadSpecializationCondition : FunctionCallSpecializeCondition
{
typedef FunctionCallSpecializeCondition Super;
- virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
+ CodeGenContext* codegenContext;
+
+ virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst)
{
// We only want to specialize for `struct` types and not base types.
//
- // TODO: We might want to consider some criteria here for the "large-ness"
- // of a structure (in terms of bytes and/or fields), so that we don't
- // eliminate loads of sufficiently small types (which are cheap to pass
- // by value).
- //
- auto paramType = param->getDataType();
- if (!as<IRStructType>(paramType))
+ auto paramType = (IRType*)unwrapAttributedType(param->getDataType());
+ if (!isTypePreferrableToDeferLoad(codegenContext, paramType))
return false;
- // We also only want to specialize for arguments that are a load
- // from some kind of global shader parameter.
+ // We want to handle loads from arbitrary access chains rooting from a shader parameter.
//
IRInst* a = arg;
- if (auto argLoad = as<IRLoad>(arg))
- {
- a = argLoad->getPtr();
- }
- else
+ for (;;)
{
- return false;
- }
+ // A user pointer can be directly passed into the function, so we no
+ // longer need to trace up further.
+ if (isUserPointerType(a->getDataType()))
+ break;
- // We want to handle loads from a shader parameter that is an array
- // of buffers, and not just a single global buffer.
- //
- while (auto argGetElement = as<IRGetElement>(a))
- {
- a = argGetElement->getBase();
+ if (auto argGetElement = as<IRGetElement>(a))
+ {
+ a = argGetElement->getBase();
+ }
+ else if (auto argSbLoad = as<IRStructuredBufferLoad>(a))
+ {
+ a = argSbLoad->getOperand(0);
+ }
+ else if (auto argBbLoad = as<IRByteAddressBufferLoad>(a))
+ {
+ a = argBbLoad->getOperand(0);
+ }
+ else if (auto argFieldExtract = as<IRFieldExtract>(a))
+ {
+ a = argFieldExtract->getBase();
+ }
+ else if (auto argGetElementPtr = as<IRGetElementPtr>(a))
+ {
+ a = argGetElementPtr->getBase();
+ }
+ else if (auto argSBGetElementPtr = as<IRRWStructuredBufferGetElementPtr>(a))
+ {
+ a = argSBGetElementPtr->getBase();
+ }
+ else if (auto argFieldAddr = as<IRFieldAddress>(a))
+ {
+ a = argFieldAddr->getBase();
+ }
+ else if (auto argLoad = as<IRLoad>(a))
+ {
+ a = argLoad->getPtr();
+
+ // We can safely defer a load to the callee if the source dest is immutable.
+ if (isPointerToImmutableLocation(a))
+ continue;
+
+ // Otherwise, we check if there is no other instructions in between the load and the
+ // call that can modify the memory location. If so, we can still safely defer the
+ // load to the callee.
+ if (!isMemoryLocationUnmodifiedBetweenLoadAndUser(
+ codegenContext->getTargetReq(),
+ argLoad,
+ callInst))
+ return false;
+ }
+ else
+ {
+ break;
+ }
}
- // The "root" of the parameter must be a reference to a global-scope
- // shader parameter, so that we know we can substitute it into the callee.
+ // The "root" of the parameter must be one of the following:
+ // 1. A reference to a global-scope shader parameter that can be referenced directly from
+ // the callee.
+ // 2. A user pointer or bindless resource handle that can be passed to the callee as
+ // ordinary argument.
//
if (const auto argGlobalParam = as<IRGlobalParam>(a))
{
return true;
}
- else
+ else if (isUserPointerType(a->getDataType()) || as<IRCastDescriptorHandleToResource>(a))
{
- return false;
+ return true;
}
-
- // TODO: There are other patterns that we could attempt to optimize here.
- // For example, this logic only handles loads of the *entire* contents of
- // a buffer, so it would miss:
- //
- // * A load of a large structure from field in a constant buffer, so that
- // the value loaded is not the entire buffer contents.
- //
- // * A load of a large structure from a structured buffer, or any other kind
- // of buffer that requires an index.
- //
- // * Any resource load that is not expressed at the IR level with a `load`
- // instruction (e.g., those that might use an intrinsic function).
- //
+ return false;
}
};
void specializeFuncsForBufferLoadArgs(CodeGenContext* codegenContext, IRModule* module)
{
FuncBufferLoadSpecializationCondition condition;
+ condition.codegenContext = codegenContext;
specializeFunctionCalls(codegenContext, module, &condition);
}
diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp
index 7c82891a6..aead69258 100644
--- a/source/slang/slang-ir-specialize-function-call.cpp
+++ b/source/slang/slang-ir-specialize-function-call.cpp
@@ -40,6 +40,12 @@ bool FunctionCallSpecializeCondition::isParamSuitableForSpecialization(
if (as<IRGlobalValueWithCode>(arg))
return true;
+ if (isUserPointerType(arg->getDataType()))
+ return true;
+
+ if (as<IRCastDescriptorHandleToResource>(arg))
+ return true;
+
// As we will see later, we can also
// specialize a call when the argument
// is the result of indexing into an
@@ -47,17 +53,29 @@ bool FunctionCallSpecializeCondition::isParamSuitableForSpecialization(
// of the indexing operation is also
// suitable for specialization.
//
- if (arg->getOp() == kIROp_GetElement || arg->getOp() == kIROp_Load)
+ switch (arg->getOp())
{
- auto base = arg->getOperand(0);
-
- // We will "recurse" on the base of
- // the indexing operation by continuing
- // our loop with the `base` as our new
- // argument.
- //
- arg = base;
- continue;
+ case kIROp_GetElement:
+ case kIROp_StructuredBufferLoad:
+ case kIROp_ByteAddressBufferLoad:
+ case kIROp_GetElementPtr:
+ case kIROp_RWStructuredBufferGetElementPtr:
+ case kIROp_FieldAddress:
+ case kIROp_FieldExtract:
+ case kIROp_Load:
+ {
+ auto base = arg->getOperand(0);
+
+ // We will "recurse" on the base of
+ // the indexing operation by continuing
+ // our loop with the `base` as our new
+ // argument.
+ //
+ arg = base;
+ continue;
+ }
+ default:
+ break;
}
// By default, we will *not* consider an argument
@@ -225,7 +243,7 @@ struct FunctionParameterSpecializationContext
// If neither the parameter nor the argument wants specialization,
// then we need to keep looking.
//
- auto paramWantSpecialization = doesParamWantSpecialization(param, arg);
+ auto paramWantSpecialization = doesParamWantSpecialization(param, arg, call);
auto paramTypeWantSpecialization = doesParamTypeWantSpecialization(param, arg);
if (!paramWantSpecialization && !paramTypeWantSpecialization)
continue;
@@ -255,9 +273,9 @@ struct FunctionParameterSpecializationContext
// Of course, now we need to back-fill the predicates that
// the above function used to evaluate prameters and arguments.
- bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
+ bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst)
{
- return condition->doesParamWantSpecialization(param, arg);
+ return condition->doesParamWantSpecialization(param, arg, callInst);
}
bool doesParamTypeWantSpecialization(IRParam* param, IRInst* arg)
@@ -484,16 +502,20 @@ struct FunctionParameterSpecializationContext
UInt oldArgIndex = oldArgCounter++;
auto oldArg = oldCall->getArg(oldArgIndex);
- getCallInfoForParam(callInfo, oldParam, oldArg);
+ getCallInfoForParam(callInfo, oldParam, oldArg, oldCall);
}
}
- void getCallInfoForParam(CallSpecializationInfo& ioInfo, IRParam* oldParam, IRInst* oldArg)
+ void getCallInfoForParam(
+ CallSpecializationInfo& ioInfo,
+ IRParam* oldParam,
+ IRInst* oldArg,
+ IRCall* callInst)
{
// We know that the case where the parameter
// and argument don't want specialization is easy.
//
- if (!doesParamWantSpecialization(oldParam, oldArg))
+ if (!doesParamWantSpecialization(oldParam, oldArg, callInst))
{
// The new call site will use the same argument
// value as the old one, and we don't need
@@ -546,7 +568,15 @@ struct FunctionParameterSpecializationContext
// Similarly for other global constants
ioInfo.key.vals.add(globalConstant);
}
- else if (oldArg->getOp() == kIROp_GetElement)
+ else if (isUserPointerType(oldArg->getDataType()))
+ {
+ // If the arg is a user pointer, we can pass it as an ordinary argument,
+ // and we won't need further tracing down the access chain.
+ //
+ ioInfo.key.vals.add(oldArg->getFullType());
+ ioInfo.newArgs.add(oldArg);
+ }
+ else if (isElementAccessInst(oldArg))
{
// This is the case where the `oldArg` is
// in the form `oldBase[oldIndex]`
@@ -587,19 +617,45 @@ struct FunctionParameterSpecializationContext
ioInfo.newArgs.add(oldIndex);
}
+ else if (isFieldAccessInst(oldArg))
+ {
+ // This is the case where the `oldArg` is
+ // in the form `oldBase.structKey`
+ //
+ auto oldBase = oldArg->getOperand(0);
+ auto structKey = oldArg->getOperand(1);
+
+ // Similar to the getElement case, we recursively setting up whatever
+ // `oldBase` needs first.
+ //
+ getCallInfoForArg(ioInfo, oldBase);
+
+ // The main difference from the `getElement` case is we actually want
+ // the structKey to be in the specialization key because it will be baked
+ // into the specialized function.
+ // And we won't introduce a new parameter to hold the index.
+ //
+ ioInfo.key.vals.add(structKey);
+ }
else if (oldArg->getOp() == kIROp_Load)
{
auto oldBase = oldArg->getOperand(0);
getCallInfoForArg(ioInfo, oldBase);
}
+ else if (oldArg->getOp() == kIROp_CastDescriptorHandleToResource)
+ {
+ // We are accessing a resource from a bindless handle.
+ // We can stop recursion here and just pass in the bindless handle as
+ // an argument.
+ auto oldBase = oldArg->getOperand(0);
+ ioInfo.key.vals.add(oldBase->getFullType());
+ ioInfo.newArgs.add(oldBase);
+ }
else
{
// If we fail to match any of the cases above
- // then a precondition was violated in that
- // `isArgSuitableForSpecialization` is allowing
- // a case that this routine is not covering.
- //
- SLANG_UNEXPECTED("mising case in 'getCallInfoForArg'");
+ // then the `SpecializeCondition` is letting through constructs that we cannot handle.
+ SLANG_UNEXPECTED("unexpected function call specialization argument form.");
}
}
@@ -641,7 +697,7 @@ struct FunctionParameterSpecializationContext
// will stand in for the parameter in the specialized
// function.
//
- auto newVal = getSpecializedValueForParam(funcInfo, oldParam, oldArg);
+ auto newVal = getSpecializedValueForParam(funcInfo, oldParam, oldArg, oldCall);
// We will collect the replacement value to use
// for each of the original parameters in an array.
@@ -681,12 +737,13 @@ struct FunctionParameterSpecializationContext
IRInst* getSpecializedValueForParam(
FuncSpecializationInfo& ioInfo,
IRParam* oldParam,
- IRInst* oldArg)
+ IRInst* oldArg,
+ IRCall* callInst)
{
// As always, the easy case is when the parameter of
// the original function doesn't need specialization.
//
- if (!doesParamWantSpecialization(oldParam, oldArg))
+ if (!doesParamWantSpecialization(oldParam, oldArg, callInst))
{
// The specialized callee will need a new parameter
// that fills the same role as the old one, so we
@@ -718,6 +775,36 @@ struct FunctionParameterSpecializationContext
}
}
+ // Returns true if `inst` is an instruction that accesses an element from an array or a buffer.
+ //
+ static bool isElementAccessInst(IRInst* inst)
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_GetElementPtr:
+ case kIROp_GetElement:
+ case kIROp_RWStructuredBufferGetElementPtr:
+ case kIROp_StructuredBufferLoad:
+ case kIROp_ByteAddressBufferLoad:
+ return true;
+ }
+ return false;
+ }
+
+ // Returns true if `inst` is an instruction that accesses a field from a struct, that is
+ // either a FieldAddress or FieldExtract.
+ //
+ static bool isFieldAccessInst(IRInst* inst)
+ {
+ switch (inst->getOp())
+ {
+ case kIROp_FieldAddress:
+ case kIROp_FieldExtract:
+ return true;
+ }
+ return false;
+ }
+
IRInst* getSpecializedValueForArg(FuncSpecializationInfo& ioInfo, IRInst* oldArg)
{
// The logic here parallels `gatherCallInfoForArg`,
@@ -735,13 +822,24 @@ struct FunctionParameterSpecializationContext
//
return globalParam;
}
+ if (isUserPointerType(oldArg->getDataType()))
+ {
+ // If argument is a user pointer, we can pass it into the callee
+ // directly as an oridinary argument without further specializing
+ // for the access chain beyond the pointer.
+ //
+ auto builder = getBuilder();
+ auto newParam = builder->createParam(oldArg->getFullType());
+ ioInfo.newParams.add(newParam);
+ return newParam;
+ }
if (auto globalFunc = as<IRGlobalValueWithCode>(oldArg))
{
// As above, the identity of the specialized function is sufficient
// to resolve the uses
return globalFunc;
}
- else if (oldArg->getOp() == kIROp_GetElement)
+ else if (isElementAccessInst(oldArg))
{
// This is the case where the argument is
// in the form `oldBase[oldIndex]`.
@@ -801,7 +899,9 @@ struct FunctionParameterSpecializationContext
// of things, and then inserted to a more permanent location later.
//
builder->setInsertLoc(IRInsertLoc());
- auto newVal = builder->emitElementExtract(oldArg->getFullType(), newBase, newIndex);
+ IRInst* newOperands[] = {newBase, newIndex};
+ auto newVal =
+ builder->emitIntrinsicInst(oldArg->getFullType(), oldArg->getOp(), 2, newOperands);
// Because our new instruction wasn't
// actually inserted anywhere, we need to
@@ -813,6 +913,30 @@ struct FunctionParameterSpecializationContext
return newVal;
}
+ else if (isFieldAccessInst(oldArg))
+ {
+ // This is the case where the argument is
+ // in the form `oldBase.structKey`.
+ //
+ auto oldBase = oldArg->getOperand(0);
+ auto structKey = oldArg->getOperand(1);
+
+ // We handle this case in a similar way as the `oldBase[oldIndex]`
+ // case, except that we don't need to introduce a new parameter
+ // for the index, since the struct key is known at compile-time.
+ auto newBase = getSpecializedValueForArg(ioInfo, oldBase);
+
+ auto builder = getBuilder();
+
+ builder->setInsertLoc(IRInsertLoc());
+ IRInst* newOperands[] = {newBase, structKey};
+ auto newVal =
+ builder->emitIntrinsicInst(oldArg->getFullType(), oldArg->getOp(), 2, newOperands);
+
+ ioInfo.newBodyInsts.add(newVal);
+
+ return newVal;
+ }
else if (auto oldArgLoad = as<IRLoad>(oldArg))
{
auto oldPtr = oldArgLoad->getPtr();
@@ -825,15 +949,30 @@ struct FunctionParameterSpecializationContext
return newVal;
}
+ else if (auto castHandleToResource = as<IRCastDescriptorHandleToResource>(oldArg))
+ {
+ // We are accessing a resource from a bindless handle.
+ // We should create a param for the handle, and load the resource from the param.
+ auto builder = getBuilder();
+ auto oldHandle = castHandleToResource->getOperand(0);
+ auto newHandle = builder->createParam(oldHandle->getFullType());
+ ioInfo.newParams.add(newHandle);
+
+ builder->setInsertLoc(IRInsertLoc());
+ IRInst* newOperands[] = {newHandle};
+ auto newVal = builder->emitIntrinsicInst(
+ oldArg->getFullType(),
+ kIROp_CastDescriptorHandleToResource,
+ 1,
+ newOperands);
+ ioInfo.newBodyInsts.add(newVal);
+ return newVal;
+ }
else
{
// If we don't match one of the above cases,
- // then `isArgSuitableForSpecialization` is
- // letting through cases that this function
- // hasn't been updated to handle.
- //
- SLANG_UNEXPECTED("mising case in 'getSpecializedValueForArg'");
- UNREACHABLE_RETURN(nullptr);
+ // then we are running into an invalid case.
+ SLANG_UNEXPECTED("unknown argument form for function call specialization.");
}
}
diff --git a/source/slang/slang-ir-specialize-function-call.h b/source/slang/slang-ir-specialize-function-call.h
index bab4ce2f4..afb8c2365 100644
--- a/source/slang/slang-ir-specialize-function-call.h
+++ b/source/slang/slang-ir-specialize-function-call.h
@@ -7,12 +7,14 @@ struct CodeGenContext;
struct IRInst;
struct IRModule;
struct IRParam;
+struct IRCall;
+
class Module;
class FunctionCallSpecializeCondition
{
public:
- virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg) = 0;
+ virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst) = 0;
virtual bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg);
diff --git a/source/slang/slang-ir-specialize-resources.cpp b/source/slang/slang-ir-specialize-resources.cpp
index 871ba2c24..0ac08236f 100644
--- a/source/slang/slang-ir-specialize-resources.cpp
+++ b/source/slang/slang-ir-specialize-resources.cpp
@@ -20,9 +20,10 @@ struct ResourceParameterSpecializationCondition : FunctionCallSpecializeConditio
TargetRequest* targetRequest = nullptr;
TargetProgram* targetProgram = nullptr;
- bool doesParamWantSpecialization(IRParam* param, IRInst* arg)
+ bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst)
{
SLANG_UNUSED(arg);
+ SLANG_UNUSED(callInst);
// Whether or not a parameter needs specialization is really
// a function of its type:
diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp
index 8584ea95e..551a72fc7 100644
--- a/source/slang/slang-ir-util.cpp
+++ b/source/slang/slang-ir-util.cpp
@@ -17,6 +17,14 @@ bool isPointerOfType(IRInst* type, IROp opCode)
return false;
}
+bool isUserPointerType(IRInst* type)
+{
+ auto ptrType = as<IRPtrType>(type);
+ if (!ptrType)
+ return false;
+ return ptrType->getAddressSpace() == AddressSpace::UserPointer;
+}
+
IRType* getVectorElementType(IRType* type)
{
if (auto vectorType = as<IRVectorType>(type))
@@ -792,35 +800,212 @@ IRInst* getRootAddr(IRInst* addr, List<IRInst*>& outAccessChain, List<IRInst*>*
return addr;
}
-// A simple and conservative address aliasing check.
-bool canAddressesPotentiallyAlias(IRGlobalValueWithCode* func, IRInst* addr1, IRInst* addr2)
+IRInst* getRootBufferOrAddr(IRInst* addr)
{
- if (addr1 == addr2)
- return true;
+ auto rootAddr = getRootAddr(addr);
+ if (as<IRRWStructuredBufferGetElementPtr>(rootAddr))
+ {
+ auto bufferHandle = rootAddr->getOperand(0);
+ // Check if the bufferHandle itself is a load from a global parameter.
+ if (auto load = as<IRLoad>(bufferHandle))
+ {
+ auto newRoot = getRootAddr(load->getPtr());
+ if (newRoot->getOp() == kIROp_GlobalParam)
+ return newRoot;
+ }
+ }
+ return rootAddr;
+}
+
+// The aliasing class of an address. This is used to determine
+// if two addresses may alias.
+enum class AddressAliasingClass
+{
+ Unknown,
+ UserPointer, // A user pointer into global memory
+ Var, // A thread-local or groupshared var.
+ ConstantBuffer, // A constant buffer or parameter block.
+ BoundBuffer, // A bound buffer.
+ BoundTexture, // A bound texture resource.
+ DescriptorHandle, // A bindless buffer or resource.
+};
+
+AddressAliasingClass getAliasingClass(IRInst* addr)
+{
+ if (auto globalParam = as<IRGlobalParam>(addr))
+ {
+ auto type = unwrapArray(globalParam->getDataType());
+ if (!type)
+ return AddressAliasingClass::Unknown;
+ switch (type->getOp())
+ {
+ case kIROp_TextureType:
+ return AddressAliasingClass::BoundTexture;
+ case kIROp_HLSLStructuredBufferType:
+ case kIROp_HLSLRWStructuredBufferType:
+ case kIROp_HLSLAppendStructuredBufferType:
+ case kIROp_HLSLConsumeStructuredBufferType:
+ case kIROp_HLSLRasterizerOrderedStructuredBufferType:
+ case kIROp_HLSLByteAddressBufferType:
+ case kIROp_HLSLRWByteAddressBufferType:
+ case kIROp_HLSLRasterizerOrderedByteAddressBufferType:
+ case kIROp_GLSLShaderStorageBufferType:
+ return AddressAliasingClass::BoundBuffer;
+ case kIROp_ConstantBufferType:
+ case kIROp_ParameterBlockType:
+ return AddressAliasingClass::ConstantBuffer;
+ case kIROp_PtrType:
+ if (isUserPointerType(type))
+ return AddressAliasingClass::UserPointer;
+ return AddressAliasingClass::Unknown;
+ case kIROp_DynamicResourceType:
+ return AddressAliasingClass::DescriptorHandle;
+ default:
+ return AddressAliasingClass::Unknown;
+ }
+ }
+ else if (as<IRVar>(addr))
+ return AddressAliasingClass::Var;
+ else if (as<IRGlobalVar>(addr))
+ return AddressAliasingClass::Var;
+ else if (as<IRRWStructuredBufferGetElementPtr>(addr))
+ return AddressAliasingClass::DescriptorHandle;
+ else if (as<IRCastDescriptorHandleToResource>(addr))
+ return AddressAliasingClass::DescriptorHandle;
- // Two variables can never alias.
- addr1 = getRootAddr(addr1);
- addr2 = getRootAddr(addr2);
+ auto type = addr->getDataType();
+ if (isUserPointerType(type))
+ return AddressAliasingClass::UserPointer;
+ return AddressAliasingClass::Unknown;
+}
- // Global addresses can alias with anything.
- if (!isChildInstOf(addr1, func))
+bool canAddrClassesAlias(AddressAliasingClass c1, AddressAliasingClass c2)
+{
+ if (c1 == AddressAliasingClass::Unknown || c2 == AddressAliasingClass::Unknown)
return true;
- if (!isChildInstOf(addr2, func))
+ switch (c1)
+ {
+ case AddressAliasingClass::Unknown:
return true;
+ case AddressAliasingClass::UserPointer:
+ case AddressAliasingClass::Var:
+ // A users pointer or var can only alias with another
+ // object that is either a user pointer or var.
+ //
+ // Generally, a var should never alias with anything else that isn't a var,
+ // if we never allow the user to take address of a local var.
+ // We don't allow taking addresses of a local var on most GPU targets, but
+ // we currently do expose an internal intrinsic to do so when targeting CPU.
+ // We should consider disallowing this across the board, or enable more aggresive
+ // criteria when targeting GPU backends.
+ // For now we stay conservative and just report true even when addr1 is var and
+ // addr2 is not rooted from a var.
+ //
+ return c2 == AddressAliasingClass::UserPointer || c2 == AddressAliasingClass::Var;
+ case AddressAliasingClass::BoundBuffer:
+ case AddressAliasingClass::BoundTexture:
+ // A bound resource can only alias with another
+ // object that is a bound resource or descriptor handle
+ return c2 == c1 || c2 == AddressAliasingClass::DescriptorHandle;
+
+ case AddressAliasingClass::DescriptorHandle:
+ // Can alias with any other resource.
+ switch (c2)
+ {
+ case AddressAliasingClass::BoundBuffer:
+ case AddressAliasingClass::BoundTexture:
+ case AddressAliasingClass::DescriptorHandle:
+ return true;
+ default:
+ return false;
+ }
+ case AddressAliasingClass::ConstantBuffer:
+ // Constant buffer cannot alias with anything.
+ return false;
+ }
+ // For any other unknown case, assume they may alias.
+ return true;
+}
+
+// Has `var` being used in a way that may allow it to alias with a user pointer?
+bool canVarAliasWithUserPointer(TargetRequest* target, IRInst* var)
+{
+ if (target && !isCPUTarget(target))
+ {
+ // We don't allow taking the address of a variable on anything other
+ // than the CPU target. Therefore a var can never alias with a user
+ // pointer on these targets.
+ return false;
+ }
+
+ SLANG_UNUSED(var);
+ return true;
+}
+
+// A simple and conservative address aliasing check.
+bool canAddressesPotentiallyAlias(
+ TargetRequest* target,
+ IRGlobalValueWithCode* func,
+ IRInst* addr1,
+ IRInst* addr2)
+{
+ if (addr1 == addr2)
+ return true;
+
+ addr1 = getRootBufferOrAddr(addr1);
+ addr2 = getRootBufferOrAddr(addr2);
+
+ auto addr1Class = getAliasingClass(addr1);
+ auto addr2Class = getAliasingClass(addr2);
- if (addr1->getOp() == kIROp_Var && addr2->getOp() == kIROp_Var && addr1 != addr2)
+ if (!canAddrClassesAlias(addr1Class, addr2Class))
return false;
+ if (addr1Class == addr2Class)
+ {
+ // For these classes of addresses, the identity of the root
+ // determines whether or not the addresse can alias.
+ // Note that we assume two different bound resources can never
+ // alias, and two different variables can never alias.
+ switch (addr1Class)
+ {
+ case AddressAliasingClass::Var:
+ case AddressAliasingClass::BoundBuffer:
+ case AddressAliasingClass::BoundTexture:
+ case AddressAliasingClass::ConstantBuffer:
+ if (addr1 != addr2)
+ return false;
+ break;
+ }
+ }
+
// A param and a var can never alias.
if (addr1->getOp() == kIROp_Param && addr1->getParent() == func->getFirstBlock() &&
addr2->getOp() == kIROp_Var ||
addr1->getOp() == kIROp_Var && addr2->getOp() == kIROp_Param &&
addr2->getParent() == func->getFirstBlock())
return false;
+
+ // If one addr is user pointer and one addr is a var,
+ // they can never alias, if the user code never took the address of
+ // the var.
+ if (addr1Class == AddressAliasingClass::Var && addr2Class == AddressAliasingClass::UserPointer)
+ {
+ return canVarAliasWithUserPointer(target, addr1);
+ }
+ if (addr2Class == AddressAliasingClass::Var && addr1Class == AddressAliasingClass::UserPointer)
+ {
+ return canVarAliasWithUserPointer(target, addr2);
+ }
return true;
}
+bool canAddressesPotentiallyAlias(IRGlobalValueWithCode* func, IRInst* addr1, IRInst* addr2)
+{
+ return canAddressesPotentiallyAlias(nullptr, func, addr1, addr2);
+}
+
bool isPtrLikeOrHandleType(IRInst* type)
{
if (!type)
@@ -1141,15 +1326,15 @@ bool areCallArgumentsSideEffectFree(IRCall* call, SideEffectAnalysisOptions opti
if (isBitSet(options, SideEffectAnalysisOptions::UseDominanceTree))
dom = module->findOrCreateDominatorTree(parentFunc);
- // If the pointer argument is a local variable (thus can't alias with other addresses)
- // and it is never read from in the function, we can safely treat the call as having
- // no side-effect.
- // This is a conservative test, but is sufficient to detect the most common case where
- // a temporary variable is used as the inout argument and the result stored in the temp
- // variable isn't being used elsewhere in the parent func.
+ // If the pointer argument is a local variable (thus can't alias with other
+ // addresses) and it is never read from in the function, we can safely treat the
+ // call as having no side-effect. This is a conservative test, but is sufficient to
+ // detect the most common case where a temporary variable is used as the inout
+ // argument and the result stored in the temp variable isn't being used elsewhere in
+ // the parent func.
//
- // A more aggresive test can check all other address uses reachable from the call site
- // and see if any of them are aliasing with the argument.
+ // A more aggresive test can check all other address uses reachable from the call
+ // site and see if any of them are aliasing with the argument.
for (auto use = arg->firstUse; use; use = use->nextUse)
{
if (as<IRDecoration>(use->getUser()))
@@ -1323,8 +1508,8 @@ bool doesCalleeHaveSideEffect(IRInst* callee)
}
}
- // If the callee has no side effect, check if any of its associated functions have side effect.
- // If so, we want to keep the callee around.
+ // If the callee has no side effect, check if any of its associated functions have side
+ // effect. If so, we want to keep the callee around.
//
// Typically, once the relevant pass has completed, the association is removed,
// and at that point we can remove the function.
@@ -2230,13 +2415,12 @@ void legalizeDefUse(IRGlobalValueWithCode* func)
!(as<IRVar>(inst) && loopHeaderBlockMap.containsKey(block)))
continue;
- // Normally, if the common dominator is not `block`, we can simply move the definition
- // to the common dominator.
- // An exception is when the common dominator is the target block of a
- // loop.
- // Another exception is when a var in the loop condition block is accessed both inside
- // and outside the loop. It is technically visible, but effects on the 'var' are not
- // visible outside the loop, so we'll need to hoist it out of the loop.
+ // Normally, if the common dominator is not `block`, we can simply move the
+ // definition to the common dominator. An exception is when the common dominator is
+ // the target block of a loop. Another exception is when a var in the loop condition
+ // block is accessed both inside and outside the loop. It is technically visible,
+ // but effects on the 'var' are not visible outside the loop, so we'll need to hoist
+ // it out of the loop.
//
// Note that after normalization, loops are in the form of:
// ```
@@ -2377,9 +2561,9 @@ bool canOperationBeSpecConst(IROp op, IRType* resultType, IRInst* const* fixedAr
// Returns true for ops that can be declared as an operation under `OpSpecConstantOp`.
//
// Integer arithmetic and comparison operations can be `OpSpecConstantOp` with the `Shader`
- // capability, while floating-point arithmetic and comparison operations require the `Kernel`
- // capability. We only support `Shader` capability for now, return false when floating-point
- // arithmetic/comparison is encountered.
+ // capability, while floating-point arithmetic and comparison operations require the
+ // `Kernel` capability. We only support `Shader` capability for now, return false when
+ // floating-point arithmetic/comparison is encountered.
switch (op)
{
case kIROp_Add:
diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h
index c0410fa3c..b8937d569 100644
--- a/source/slang/slang-ir-util.h
+++ b/source/slang/slang-ir-util.h
@@ -70,6 +70,8 @@ bool isPointerOfType(IRInst* ptrType, IRInst* elementType);
// True if ptrType is a pointer type to a type of opCode
bool isPointerOfType(IRInst* ptrType, IROp opCode);
+bool isUserPointerType(IRInst* type);
+
// Builds a dictionary that maps from requirement key to requirement value for `interfaceType`.
Dictionary<IRInst*, IRInst*> buildInterfaceRequirementDict(IRInterfaceType* interfaceType);
@@ -205,6 +207,12 @@ IRInst* getRootAddr(
bool canAddressesPotentiallyAlias(IRGlobalValueWithCode* func, IRInst* addr1, IRInst* addr2);
+bool canAddressesPotentiallyAlias(
+ TargetRequest* target,
+ IRGlobalValueWithCode* func,
+ IRInst* addr1,
+ IRInst* addr2);
+
String dumpIRToString(
IRInst* root,
IRDumpOptions options = {IRDumpOptions::Mode::Simplified, IRDumpOptions::Flag::DumpDebugIds});