diff options
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/slang-emit-wgsl.cpp | 5 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 8 | ||||
| -rw-r--r-- | source/slang/slang-ir-defer-buffer-load.cpp | 326 | ||||
| -rw-r--r-- | source/slang/slang-ir-defer-buffer-load.h | 22 | ||||
| -rw-r--r-- | source/slang/slang-ir-defunctionalization.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-glsl-legalize.cpp | 10 | ||||
| -rw-r--r-- | source/slang/slang-ir-metal-legalize.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-address-space.cpp | 43 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-arrays.cpp | 32 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-buffer-load-arg.cpp | 124 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-function-call.cpp | 205 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-function-call.h | 4 | ||||
| -rw-r--r-- | source/slang/slang-ir-specialize-resources.cpp | 3 | ||||
| -rw-r--r-- | source/slang/slang-ir-util.cpp | 246 | ||||
| -rw-r--r-- | source/slang/slang-ir-util.h | 8 |
15 files changed, 744 insertions, 296 deletions
diff --git a/source/slang/slang-emit-wgsl.cpp b/source/slang/slang-emit-wgsl.cpp index 53c3aa487..b115c723a 100644 --- a/source/slang/slang-emit-wgsl.cpp +++ b/source/slang/slang-emit-wgsl.cpp @@ -295,6 +295,11 @@ void WGSLSourceEmitter::emitStructFieldAttributes( { SLANG_UNUSED(allowOffsetLayout); + // If the struct type is not used for physical storage, then we don't need to + // emit any layout attributes. + if (!structType->findDecoration<IRPhysicalTypeDecoration>()) + return; + // Tint emits errors unless we explicitly spell out the layout in some cases, so emit // offset and align attribtues for all fields. IRSizeAndAlignmentDecoration* const sizeAndAlignmentDecoration = diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index f1cc6090d..09c2efea9 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -1387,16 +1387,10 @@ Result linkAndOptimizeIR( specializeFuncsForBufferLoadArgs(codeGenContext, irModule); // Push `structuredBufferLoad` to the end of access chain to avoid loading unnecessary data. - if (isKhronosTarget(targetRequest) || isMetalTarget(targetRequest) || - isWGPUTarget(targetRequest)) - deferBufferLoad(irModule); + deferBufferLoad(codeGenContext, irModule); // We also want to specialize calls to functions that // takes unsized array parameters if possible. - // Moreover, for Khronos targets, we also want to specialize calls to functions - // that takes arrays/structs containing arrays as parameters with the actual - // global array object to avoid loading big arrays into SSA registers, which seems - // to cause performance issues. specializeArrayParameters(codeGenContext, irModule); #if 0 diff --git a/source/slang/slang-ir-defer-buffer-load.cpp b/source/slang/slang-ir-defer-buffer-load.cpp index 51c6a161b..ccdfe4538 100644 --- a/source/slang/slang-ir-defer-buffer-load.cpp +++ b/source/slang/slang-ir-defer-buffer-load.cpp @@ -3,142 +3,211 @@ #include "slang-ir-clone.h" #include "slang-ir-dominators.h" #include "slang-ir-insts.h" +#include "slang-ir-layout.h" #include "slang-ir-redundancy-removal.h" #include "slang-ir-util.h" #include "slang-ir.h" namespace Slang { -struct DeferBufferLoadContext -{ - // Map an original SSA value to a pointer that can be used to load the value. - Dictionary<IRInst*, IRInst*> mapValueToPtr; - // Map an ptr to its loaded value. - Dictionary<IRInst*, IRInst*> mapPtrToValue; +// Generally, we want to specialize arguments that are large in size, or arguments that +// are arrays or composite type that contains arrays. +// This is because: +// 1. Struct types without arrays will eventually be SROA's into registers and then effectively +// DCE'd, so they usually won't cause performance issues. In fact, front loading structs +// and reusing the loaded value instead of repetitively loading from constant memory is +// usually beneficial to performance. However large struct values can be SROA'd into a large +// number of registers, causing slow downstream compilation. Therefore we should avoid/defer +// loading them into registers if we can. +// 2. Arrays usually cannot be SROA'd into individual registers, which usually leads to +// large register consumption if they ever get loaded, so we want to defer loading array +// typed values as much as possible. - IRFunc* currentFunc = nullptr; +// If the argument data is bigger than this threshold, it is considered a large object +// and we will try to specialize it even if it doesn't contain arrays. +static const int kBufferLoadElementSizeSpecializationThreshold = 128; - // Ensure that for an original SSA value, we have formed a pointer that can be used to load the - // value. - IRInst* ensurePtr(IRInst* valueInst) - { - IRInst* result = nullptr; - if (mapValueToPtr.tryGetValue(valueInst, result)) - return result; +// If the argument data is smaller than this threshold, it is considered a tiny object +// and we will not consider specializing it, even if it contains arrays. +static const int kBufferLoadElementSizeSpecializationMinThreshold = 16; - IRBuilder b(valueInst); - b.setInsertBefore(valueInst); - - switch (valueInst->getOp()) +static bool isCompositeTypeContainingArrays(IRType* type) +{ + if (auto structType = as<IRStructType>(type)) + { + for (auto field : structType->getFields()) { - case kIROp_StructuredBufferLoad: - case kIROp_StructuredBufferLoadStatus: - { - result = b.emitRWStructuredBufferGetElementPtr( - valueInst->getOperand(0), - valueInst->getOperand(1)); - break; - } - case kIROp_GetElement: + if (const auto arrayType = as<IRArrayTypeBase>(field->getFieldType())) { - auto ptr = ensurePtr(valueInst->getOperand(0)); - if (!ptr) - return nullptr; - result = b.emitElementAddress(ptr, valueInst->getOperand(1)); - break; + return true; } - case kIROp_FieldExtract: + if (auto subStructType = as<IRStructType>(field->getFieldType())) { - auto ptr = ensurePtr(valueInst->getOperand(0)); - if (!ptr) - return nullptr; - result = b.emitFieldAddress(ptr, valueInst->getOperand(1)); - break; + if (isCompositeTypeContainingArrays(subStructType)) + return true; } - case kIROp_Load: - result = valueInst->getOperand(0); - break; - } - if (result) - { - mapValueToPtr[valueInst] = result; } - return result; } + else if (as<IRArrayTypeBase>(type)) + { + return true; + } + return false; +} - static bool isImmutableBufferLoad(IRInst* inst) +bool isTypePreferrableToDeferLoad(CodeGenContext* codeGenContext, IRType* type) +{ + // If parameter is a pointer/reference, we should consider specialize it. + if (as<IROutTypeBase>(type) || as<IRRefType>(type) || as<IRConstRefType>(type)) + return true; + + // We only want to defer loading values that are "large enough" that + // we expect them to be expensive to pass by value. + // + IRSizeAndAlignment sizeAlignment = {}; + if (SLANG_FAILED(getNaturalSizeAndAlignment( + codeGenContext->getTargetProgram()->getOptionSet(), + type, + &sizeAlignment))) { - // Note: we cannot defer loads from RWStructuredBuffer because there can be other - // instructions that modify the buffer. + // If type contains fields that we don't know how to compute natural size + // for, default to specialize if it contains arrays. + return isCompositeTypeContainingArrays(type); + } + + // If the argument is very small, don't bother specializing. + if (sizeAlignment.size <= kBufferLoadElementSizeSpecializationMinThreshold) + return false; + + // If the argument is somewhat small, don't specialize, unless it contains + // arrays. + if (sizeAlignment.size <= kBufferLoadElementSizeSpecializationThreshold) + { + // We generally do not specialize for small values, except it contains + // arrays that usually present a challenge for the SROA pass to eliminate + // unnecessary loads. + if (!isCompositeTypeContainingArrays(type)) + return false; + } + return true; +} + +// Returns true if memory loaded by `loadInst` is not modified before `userInst` after it is +// loaded. +// This method is currently implementing a very conservative analysis that only allows +// `loadInst` to be in the same block as `userInst`, with basic aliasing analysis for any +// stores in between. All other cases are conservatively treated as the memory location may be +// modified. +bool isMemoryLocationUnmodifiedBetweenLoadAndUser( + TargetRequest* target, + IRInst* loadInst, + IRInst* userInst) +{ + auto func = getParentFunc(loadInst); + if (!func) + return false; + + // For now we only check if loadInst and userInst are in the same block. + if (loadInst->getParent() != userInst->getParent()) + return false; + + for (IRInst* inst = loadInst->getNextInst(); inst; inst = inst->getNextInst()) + { + // We found callInst before hitting any instruction that may modify the memory. + if (inst == userInst) + return true; + + if (!inst->mightHaveSideEffects()) + continue; + + // If we see any inst that has side effect, check if it is simple case that we can rule + // out the possibility of modifying the memory location. switch (inst->getOp()) { - case kIROp_StructuredBufferLoad: - case kIROp_StructuredBufferLoadStatus: - return true; - case kIROp_Load: + case kIROp_Store: { - auto rootAddr = getRootAddr(inst->getOperand(0)); - return isPointerToImmutableLocation(rootAddr); + auto storedDest = inst->getOperand(0); + if (canAddressesPotentiallyAlias(target, func, loadInst->getOperand(0), storedDest)) + return false; + continue; } default: + // For any other case, conservatively assume the memory location may be modified. return false; } } + // We didn't found callInst after loadInst within the same basic block. + // We conservatively assume the memory location may be modified. + // This check can be extended to use the dominator tree to allow + // loadInst and userInst to be in different blocks. + return false; +} - // Ensure that for a pointer value, we have created a load instruction to materialize the value. - IRInst* materializePointer(IRBuilder& builder, IRInst* loadInst) +struct DeferBufferLoadContext +{ + CodeGenContext* codeGenContext; + + + void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst) { - auto ptr = ensurePtr(loadInst); - if (!ptr) - return nullptr; - IRInst* result = nullptr; - if (mapPtrToValue.tryGetValue(ptr, result)) - return result; - IRAlignedAttr* align = nullptr; - if (auto load = as<IRLoad>(loadInst)) - align = load->findAttr<IRAlignedAttr>(); - if (!as<IRModuleInst>(ptr->getParent())) + // Don't defer the load anymore if the type is simple. + if (!isTypePreferrableToDeferLoad(codeGenContext, loadInst->getDataType()) || + loadInst->findAttr<IRAlignedAttr>()) { - setInsertAfterOrdinaryInst(&builder, ptr); - IRType* valueType = tryGetPointedToType(&builder, ptr->getFullType()); - result = builder.emitLoad(valueType, ptr, align); - mapPtrToValue[ptr] = result; + return; } - else + + auto rootAddr = getRootAddr(loadInst->getOperand(0)); + bool isImmutableBufferLoad = isPointerToImmutableLocation(rootAddr); + + // Don't defer the load if there are uses that are not getElement or fieldExtract. + // Because in this case we need to use the entire loaded value, and further deferring + // the load down any access chain will introduce redundant loads. + for (auto use = loadInst->firstUse; use; use = use->nextUse) { - setInsertBeforeOrdinaryInst(&builder, loadInst); - IRType* valueType = tryGetPointedToType(&builder, ptr->getFullType()); - result = builder.emitLoad(valueType, ptr, align); - // Since we are inserting the load in a local scope, we can't register - // the mapping to the pointer, since the global pointer needs to be - // loaded once per function. + auto user = use->getUser(); + switch (user->getOp()) + { + case kIROp_GetElement: + case kIROp_FieldExtract: + // Can we defer the load to load only the requested element right before + // the element extract inst? + // If the buffer is immutable, we can always do that. + // If it is not, we need to make sure there is no other instructions that can modify + // the buffer between the load and the use. + // + if (isImmutableBufferLoad) + continue; + if (isMemoryLocationUnmodifiedBetweenLoadAndUser( + codeGenContext->getTargetReq(), + loadInst, + user)) + continue; + return; + default: + // If we see any other use the laod instruction, we assume the entire loaded value + // is needed, and we can't defer the load anymore. + return; + } } - return result; - } - static bool isSimpleType(IRInst* type) - { - if (auto modType = as<IRRateQualifiedType>(type)) - type = modType->getValueType(); - if (as<IRStructType>(type)) - return false; - if (as<IRTupleType>(type)) - return false; - if (as<IRArrayTypeBase>(type)) - return false; - return true; - } + // If we reach here, it means all uses are getElement or fieldExtract, and + // it is safe to defer the load down the access chain. - void deferBufferLoadInst(IRBuilder& builder, List<IRInst*>& workList, IRInst* loadInst) - { - // Don't defer the load anymore if the type is simple. - if (isSimpleType(loadInst->getDataType()) || loadInst->findAttr<IRAlignedAttr>()) + if (loadInst->getOp() == kIROp_StructuredBufferLoad) { - auto materializedVal = materializePointer(builder, loadInst); - loadInst->transferDecorationsTo(materializedVal); - loadInst->replaceUsesWith(materializedVal); - return; + // Convert the structuredBufferLoad to a regular load to reuse + // the same logic for deferring regular loads. + builder.setInsertBefore(loadInst); + auto bufferPtr = builder.emitRWStructuredBufferGetElementPtr( + loadInst->getOperand(0), + loadInst->getOperand(1)); + auto sbLoad = builder.emitLoad(bufferPtr); + loadInst->transferDecorationsTo(sbLoad); + loadInst->replaceUsesWith(sbLoad); + loadInst->removeAndDeallocate(); + loadInst = sbLoad; } // Otherwise, look for all uses and try to defer the load before actual use of the value. @@ -148,19 +217,29 @@ struct DeferBufferLoadContext loadInst, [&](IRUse* use) { - if (needMaterialize) - return; - auto user = use->getUser(); + switch (user->getOp()) { case kIROp_GetElement: case kIROp_FieldExtract: { - auto basePtr = ensurePtr(loadInst); - if (!basePtr) - return; - pendingWorkList.add(user); + // If we see a getElement or fieldExtract, we defer the load by + // replacing the getElement/fieldExtract with a load of the + // elementAddr/fieldAddr. + builder.setInsertBefore(user); + auto basePtr = loadInst->getOperand(0); + IRInst* gepArg = user->getOperand(1); + auto elementPtr = builder.emitElementAddress( + basePtr, + makeArrayViewSingle<IRInst*>(gepArg)); + auto newLoad = builder.emitLoad(elementPtr); + user->transferDecorationsTo(newLoad); + user->replaceUsesWith(newLoad); + user->removeAndDeallocate(); + + // Now add the new load to work list to try to defer it further. + pendingWorkList.add(newLoad); } break; default: @@ -169,41 +248,37 @@ struct DeferBufferLoadContext } }); - if (needMaterialize) - { - auto val = materializePointer(builder, loadInst); - loadInst->transferDecorationsTo(val); - loadInst->replaceUsesWith(val); - loadInst->removeAndDeallocate(); - } - else - { - // Append to worklist in reverse order so we process the uses in natural appearance - // order. - for (Index i = pendingWorkList.getCount() - 1; i >= 0; i--) - workList.add(pendingWorkList[i]); - } + // Append to worklist in reverse order so we process the uses in natural appearance + // order. + for (Index i = pendingWorkList.getCount() - 1; i >= 0; i--) + workList.add(pendingWorkList[i]); } void deferBufferLoadInFunc(IRFunc* func) { removeRedundancyInFunc(func, false); - currentFunc = func; - List<IRInst*> workList; + // Discover all load instructions and add to work list. + for (auto block : func->getBlocks()) { for (auto inst : block->getChildren()) { - if (isImmutableBufferLoad(inst)) + switch (inst->getOp()) { + case kIROp_Load: + case kIROp_StructuredBufferLoad: + // Note: We don't handle `kIROp_StructuredBufferLoadStatus` here because + // it also writes to the status code out parameter, which we can't defer. workList.add(inst); + break; } } } + // Iteratively process the work list until it is empty. IRBuilder builder(func); for (Index i = 0; i < workList.getCount(); i++) { @@ -227,9 +302,10 @@ struct DeferBufferLoadContext } }; -void deferBufferLoad(IRModule* module) +void deferBufferLoad(CodeGenContext* codeGenContext, IRModule* module) { DeferBufferLoadContext context; + context.codeGenContext = codeGenContext; for (auto childInst : module->getGlobalInsts()) { if (auto code = as<IRGlobalValueWithCode>(childInst)) diff --git a/source/slang/slang-ir-defer-buffer-load.h b/source/slang/slang-ir-defer-buffer-load.h index b54271883..0f692b39a 100644 --- a/source/slang/slang-ir-defer-buffer-load.h +++ b/source/slang/slang-ir-defer-buffer-load.h @@ -4,9 +4,8 @@ namespace Slang { /* -This pass implements a targeted optimization that defers the loading of structured buffer elements -to the end of the access chain to avoid loading and repacking unnecessary data. -For example, if we see: +This pass implements a intra-function optimization that defers the loading of buffer +elements to the end of the access chain to avoid loading unnecessary data. For example, if we see: val = StructuredBufferLoad(s, i) val2 = GetElement(val, j) val3 = FieldExtract(val2, field_key_0) @@ -20,7 +19,22 @@ We should rewrite the code into: */ struct IRModule; +struct IRType; +struct CodeGenContext; +struct IRInst; +class TargetRequest; -void deferBufferLoad(IRModule* module); +void deferBufferLoad(CodeGenContext* context, IRModule* module); + +// Returns true if the type is suitable for defer-load optimization. +// Generally, we want to defer loading large structs or composites that contain arrays. +bool isTypePreferrableToDeferLoad(CodeGenContext* context, IRType* type); + +// Returns true if memory loaded by `loadInst` may be modified before `userInst` after it is +// loaded. +bool isMemoryLocationUnmodifiedBetweenLoadAndUser( + TargetRequest* target, + IRInst* loadInst, + IRInst* userInst); } // namespace Slang diff --git a/source/slang/slang-ir-defunctionalization.cpp b/source/slang/slang-ir-defunctionalization.cpp index af84ec78a..424971f90 100644 --- a/source/slang/slang-ir-defunctionalization.cpp +++ b/source/slang/slang-ir-defunctionalization.cpp @@ -12,7 +12,7 @@ struct FunctionParameterSpecializationCondition : FunctionCallSpecializeConditio { TargetRequest* targetRequest = nullptr; - bool doesParamWantSpecialization(IRParam* param, IRInst* /*arg*/) + bool doesParamWantSpecialization(IRParam* param, IRInst* /*arg*/, IRCall* /*callInst*/) { IRType* type = param->getDataType(); return as<IRFuncType>(type); diff --git a/source/slang/slang-ir-glsl-legalize.cpp b/source/slang/slang-ir-glsl-legalize.cpp index a79ca2379..d87d96da0 100644 --- a/source/slang/slang-ir-glsl-legalize.cpp +++ b/source/slang/slang-ir-glsl-legalize.cpp @@ -2694,7 +2694,10 @@ static void legalizeMeshPayloadInputParam( pp->replaceUsesWith(g); struct MeshPayloadInputSpecializationCondition : FunctionCallSpecializeCondition { - bool doesParamWantSpecialization(IRParam*, IRInst* arg) { return arg == g; } + bool doesParamWantSpecialization(IRParam*, IRInst* arg, IRCall* /*call*/) + { + return arg == g; + } IRInst* g; } condition; condition.g = g; @@ -2794,7 +2797,10 @@ static void legalizeMeshOutputParam( // pp is only removed later on, so sadly we have to keep it around for now struct MeshOutputSpecializationCondition : FunctionCallSpecializeCondition { - bool doesParamWantSpecialization(IRParam*, IRInst* arg) { return arg == g; } + bool doesParamWantSpecialization(IRParam*, IRInst* arg, IRCall* /*call*/) + { + return arg == g; + } IRInst* g; } condition; condition.g = g; diff --git a/source/slang/slang-ir-metal-legalize.cpp b/source/slang/slang-ir-metal-legalize.cpp index e66617e72..e91da136a 100644 --- a/source/slang/slang-ir-metal-legalize.cpp +++ b/source/slang/slang-ir-metal-legalize.cpp @@ -172,7 +172,7 @@ struct MetalAddressSpaceAssigner : InitialAddressSpaceAssigner { if (ptrType->hasAddressSpace()) return ptrType->getAddressSpace(); - return AddressSpace::Global; + return AddressSpace::Generic; } return AddressSpace::Generic; } diff --git a/source/slang/slang-ir-specialize-address-space.cpp b/source/slang/slang-ir-specialize-address-space.cpp index c4a155eec..04792bd8b 100644 --- a/source/slang/slang-ir-specialize-address-space.cpp +++ b/source/slang/slang-ir-specialize-address-space.cpp @@ -131,7 +131,6 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext bool processFunction(IRFunc* func) { bool retValAddrSpaceChanged = false; - Dictionary<IRInst*, AddressSpace> mapVarValueToAddrSpace; bool changed = true; while (changed) { @@ -152,18 +151,23 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext continue; } - // If the inst already has a pointer type with explicit address space, then use - // it. - if (auto ptrType = as<IRPtrTypeBase>(inst->getDataType())) + // If the inst already has a pointer/pointer-like type with explicit address + // space, then use it. + auto addrSpaceFromType = + addrSpaceAssigner->getAddressSpaceFromVarType(inst->getDataType()); + if (addrSpaceFromType != AddressSpace::Generic) { - if (ptrType->hasAddressSpace()) - { - mapInstToAddrSpace[inst] = ptrType->getAddressSpace(); + mapInstToAddrSpace[inst] = addrSpaceFromType; + changed = true; + + // Don't return early if the inst itself is a call, as we may still need to + // specialize it down below. + if (inst->getOp() != kIROp_Call) continue; - } } - // Otherwise, try to assign an address space based on the instruction type. + // Try to assign an address space based on the instruction type, and specialize + // calls. switch (inst->getOp()) { case kIROp_Var: @@ -195,15 +199,6 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext } break; case kIROp_Store: - { - auto addrSpace = getAddrSpace(inst->getOperand(1)); - if (addrSpace != AddressSpace::Generic) - { - mapVarValueToAddrSpace[inst->getOperand(0)] = addrSpace; - mapInstToAddrSpace[inst] = addrSpace; - changed = true; - } - } break; case kIROp_Param: if (!isFirstBlock) @@ -243,8 +238,9 @@ struct AddressSpaceContext : public AddressSpaceSpecializationContext for (UInt i = 0; i < callInst->getArgCount(); i++) { auto arg = callInst->getArg(i); - argAddrSpaces.add(getAddrSpace(arg)); - if (as<IRPtrTypeBase>(arg->getDataType())) + auto addrSpace = getAddrSpace(arg); + argAddrSpaces.add(addrSpace); + if (addrSpace != AddressSpace::Generic) { hasSpecializableArg = true; } @@ -477,8 +473,13 @@ void propagateAddressSpaceFromInsts(List<IRInst*>&& workList) } } -AddressSpace NoOpInitialAddressSpaceAssigner::getAddressSpaceFromVarType(IRInst*) +AddressSpace NoOpInitialAddressSpaceAssigner::getAddressSpaceFromVarType(IRInst* type) { + if (auto ptrType = as<IRPtrTypeBase>(type)) + { + if (ptrType->hasAddressSpace()) + return ptrType->getAddressSpace(); + } return AddressSpace::Generic; } diff --git a/source/slang/slang-ir-specialize-arrays.cpp b/source/slang/slang-ir-specialize-arrays.cpp index 4a4a72ee9..edb6cfa28 100644 --- a/source/slang/slang-ir-specialize-arrays.cpp +++ b/source/slang/slang-ir-specialize-arrays.cpp @@ -11,38 +11,14 @@ namespace Slang struct ArrayParameterSpecializationCondition : FunctionCallSpecializeCondition { // This pass is intended to specialize functions - // with struct parameters that has array fields - // to avoid performance problems for GLSL targets. - // Returns true if `type` is an `IRStructType` with array-typed fields. - // It will also specialize functions with unsized array parameters into - // sized arrays, if the function is called with an argument that has a - // sized array type. + // with unsized array parameter called with a sized-array argument. // - bool isStructTypeWithArray(IRType* type) - { - if (auto structType = as<IRStructType>(type)) - { - for (auto field : structType->getFields()) - { - if (const auto arrayType = as<IRArrayType>(field->getFieldType())) - { - return true; - } - if (auto subStructType = as<IRStructType>(field->getFieldType())) - { - if (isStructTypeWithArray(subStructType)) - return true; - } - } - } - return false; - } - bool doesParamWantSpecialization(IRParam* param, IRInst* arg) + bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst) { + SLANG_UNUSED(param); SLANG_UNUSED(arg); - if (isKhronosTarget(codeGenContext->getTargetReq())) - return isStructTypeWithArray(param->getDataType()); + SLANG_UNUSED(callInst); return false; } diff --git a/source/slang/slang-ir-specialize-buffer-load-arg.cpp b/source/slang/slang-ir-specialize-buffer-load-arg.cpp index 905f2e058..a5a3dd2d9 100644 --- a/source/slang/slang-ir-specialize-buffer-load-arg.cpp +++ b/source/slang/slang-ir-specialize-buffer-load-arg.cpp @@ -1,8 +1,11 @@ // slang-ir-specialize-buffer-load-arg.cpp #include "slang-ir-specialize-buffer-load-arg.h" +#include "slang-ir-defer-buffer-load.h" #include "slang-ir-insts.h" +#include "slang-ir-layout.h" #include "slang-ir-specialize-function-call.h" +#include "slang-ir-util.h" #include "slang-ir.h" namespace Slang @@ -17,76 +20,115 @@ namespace Slang // As swith most of our IR passes, we encapsulate the logic here in a context // type so that the data that needs to be shared throughout the pass can // be conveniently scoped. +// + +// Note that this pass also ensures other more contrived cases are properly +// handled. For example: +// +// * A load of a large structure from field in a constant buffer, so that +// the value loaded is not the entire buffer contents. +// +// * A load of a large structure from a structured buffer, or any other kind +// of buffer that requires an index. +// struct FuncBufferLoadSpecializationCondition : FunctionCallSpecializeCondition { typedef FunctionCallSpecializeCondition Super; - virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg) + CodeGenContext* codegenContext; + + virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst) { // We only want to specialize for `struct` types and not base types. // - // TODO: We might want to consider some criteria here for the "large-ness" - // of a structure (in terms of bytes and/or fields), so that we don't - // eliminate loads of sufficiently small types (which are cheap to pass - // by value). - // - auto paramType = param->getDataType(); - if (!as<IRStructType>(paramType)) + auto paramType = (IRType*)unwrapAttributedType(param->getDataType()); + if (!isTypePreferrableToDeferLoad(codegenContext, paramType)) return false; - // We also only want to specialize for arguments that are a load - // from some kind of global shader parameter. + // We want to handle loads from arbitrary access chains rooting from a shader parameter. // IRInst* a = arg; - if (auto argLoad = as<IRLoad>(arg)) - { - a = argLoad->getPtr(); - } - else + for (;;) { - return false; - } + // A user pointer can be directly passed into the function, so we no + // longer need to trace up further. + if (isUserPointerType(a->getDataType())) + break; - // We want to handle loads from a shader parameter that is an array - // of buffers, and not just a single global buffer. - // - while (auto argGetElement = as<IRGetElement>(a)) - { - a = argGetElement->getBase(); + if (auto argGetElement = as<IRGetElement>(a)) + { + a = argGetElement->getBase(); + } + else if (auto argSbLoad = as<IRStructuredBufferLoad>(a)) + { + a = argSbLoad->getOperand(0); + } + else if (auto argBbLoad = as<IRByteAddressBufferLoad>(a)) + { + a = argBbLoad->getOperand(0); + } + else if (auto argFieldExtract = as<IRFieldExtract>(a)) + { + a = argFieldExtract->getBase(); + } + else if (auto argGetElementPtr = as<IRGetElementPtr>(a)) + { + a = argGetElementPtr->getBase(); + } + else if (auto argSBGetElementPtr = as<IRRWStructuredBufferGetElementPtr>(a)) + { + a = argSBGetElementPtr->getBase(); + } + else if (auto argFieldAddr = as<IRFieldAddress>(a)) + { + a = argFieldAddr->getBase(); + } + else if (auto argLoad = as<IRLoad>(a)) + { + a = argLoad->getPtr(); + + // We can safely defer a load to the callee if the source dest is immutable. + if (isPointerToImmutableLocation(a)) + continue; + + // Otherwise, we check if there is no other instructions in between the load and the + // call that can modify the memory location. If so, we can still safely defer the + // load to the callee. + if (!isMemoryLocationUnmodifiedBetweenLoadAndUser( + codegenContext->getTargetReq(), + argLoad, + callInst)) + return false; + } + else + { + break; + } } - // The "root" of the parameter must be a reference to a global-scope - // shader parameter, so that we know we can substitute it into the callee. + // The "root" of the parameter must be one of the following: + // 1. A reference to a global-scope shader parameter that can be referenced directly from + // the callee. + // 2. A user pointer or bindless resource handle that can be passed to the callee as + // ordinary argument. // if (const auto argGlobalParam = as<IRGlobalParam>(a)) { return true; } - else + else if (isUserPointerType(a->getDataType()) || as<IRCastDescriptorHandleToResource>(a)) { - return false; + return true; } - - // TODO: There are other patterns that we could attempt to optimize here. - // For example, this logic only handles loads of the *entire* contents of - // a buffer, so it would miss: - // - // * A load of a large structure from field in a constant buffer, so that - // the value loaded is not the entire buffer contents. - // - // * A load of a large structure from a structured buffer, or any other kind - // of buffer that requires an index. - // - // * Any resource load that is not expressed at the IR level with a `load` - // instruction (e.g., those that might use an intrinsic function). - // + return false; } }; void specializeFuncsForBufferLoadArgs(CodeGenContext* codegenContext, IRModule* module) { FuncBufferLoadSpecializationCondition condition; + condition.codegenContext = codegenContext; specializeFunctionCalls(codegenContext, module, &condition); } diff --git a/source/slang/slang-ir-specialize-function-call.cpp b/source/slang/slang-ir-specialize-function-call.cpp index 7c82891a6..aead69258 100644 --- a/source/slang/slang-ir-specialize-function-call.cpp +++ b/source/slang/slang-ir-specialize-function-call.cpp @@ -40,6 +40,12 @@ bool FunctionCallSpecializeCondition::isParamSuitableForSpecialization( if (as<IRGlobalValueWithCode>(arg)) return true; + if (isUserPointerType(arg->getDataType())) + return true; + + if (as<IRCastDescriptorHandleToResource>(arg)) + return true; + // As we will see later, we can also // specialize a call when the argument // is the result of indexing into an @@ -47,17 +53,29 @@ bool FunctionCallSpecializeCondition::isParamSuitableForSpecialization( // of the indexing operation is also // suitable for specialization. // - if (arg->getOp() == kIROp_GetElement || arg->getOp() == kIROp_Load) + switch (arg->getOp()) { - auto base = arg->getOperand(0); - - // We will "recurse" on the base of - // the indexing operation by continuing - // our loop with the `base` as our new - // argument. - // - arg = base; - continue; + case kIROp_GetElement: + case kIROp_StructuredBufferLoad: + case kIROp_ByteAddressBufferLoad: + case kIROp_GetElementPtr: + case kIROp_RWStructuredBufferGetElementPtr: + case kIROp_FieldAddress: + case kIROp_FieldExtract: + case kIROp_Load: + { + auto base = arg->getOperand(0); + + // We will "recurse" on the base of + // the indexing operation by continuing + // our loop with the `base` as our new + // argument. + // + arg = base; + continue; + } + default: + break; } // By default, we will *not* consider an argument @@ -225,7 +243,7 @@ struct FunctionParameterSpecializationContext // If neither the parameter nor the argument wants specialization, // then we need to keep looking. // - auto paramWantSpecialization = doesParamWantSpecialization(param, arg); + auto paramWantSpecialization = doesParamWantSpecialization(param, arg, call); auto paramTypeWantSpecialization = doesParamTypeWantSpecialization(param, arg); if (!paramWantSpecialization && !paramTypeWantSpecialization) continue; @@ -255,9 +273,9 @@ struct FunctionParameterSpecializationContext // Of course, now we need to back-fill the predicates that // the above function used to evaluate prameters and arguments. - bool doesParamWantSpecialization(IRParam* param, IRInst* arg) + bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst) { - return condition->doesParamWantSpecialization(param, arg); + return condition->doesParamWantSpecialization(param, arg, callInst); } bool doesParamTypeWantSpecialization(IRParam* param, IRInst* arg) @@ -484,16 +502,20 @@ struct FunctionParameterSpecializationContext UInt oldArgIndex = oldArgCounter++; auto oldArg = oldCall->getArg(oldArgIndex); - getCallInfoForParam(callInfo, oldParam, oldArg); + getCallInfoForParam(callInfo, oldParam, oldArg, oldCall); } } - void getCallInfoForParam(CallSpecializationInfo& ioInfo, IRParam* oldParam, IRInst* oldArg) + void getCallInfoForParam( + CallSpecializationInfo& ioInfo, + IRParam* oldParam, + IRInst* oldArg, + IRCall* callInst) { // We know that the case where the parameter // and argument don't want specialization is easy. // - if (!doesParamWantSpecialization(oldParam, oldArg)) + if (!doesParamWantSpecialization(oldParam, oldArg, callInst)) { // The new call site will use the same argument // value as the old one, and we don't need @@ -546,7 +568,15 @@ struct FunctionParameterSpecializationContext // Similarly for other global constants ioInfo.key.vals.add(globalConstant); } - else if (oldArg->getOp() == kIROp_GetElement) + else if (isUserPointerType(oldArg->getDataType())) + { + // If the arg is a user pointer, we can pass it as an ordinary argument, + // and we won't need further tracing down the access chain. + // + ioInfo.key.vals.add(oldArg->getFullType()); + ioInfo.newArgs.add(oldArg); + } + else if (isElementAccessInst(oldArg)) { // This is the case where the `oldArg` is // in the form `oldBase[oldIndex]` @@ -587,19 +617,45 @@ struct FunctionParameterSpecializationContext ioInfo.newArgs.add(oldIndex); } + else if (isFieldAccessInst(oldArg)) + { + // This is the case where the `oldArg` is + // in the form `oldBase.structKey` + // + auto oldBase = oldArg->getOperand(0); + auto structKey = oldArg->getOperand(1); + + // Similar to the getElement case, we recursively setting up whatever + // `oldBase` needs first. + // + getCallInfoForArg(ioInfo, oldBase); + + // The main difference from the `getElement` case is we actually want + // the structKey to be in the specialization key because it will be baked + // into the specialized function. + // And we won't introduce a new parameter to hold the index. + // + ioInfo.key.vals.add(structKey); + } else if (oldArg->getOp() == kIROp_Load) { auto oldBase = oldArg->getOperand(0); getCallInfoForArg(ioInfo, oldBase); } + else if (oldArg->getOp() == kIROp_CastDescriptorHandleToResource) + { + // We are accessing a resource from a bindless handle. + // We can stop recursion here and just pass in the bindless handle as + // an argument. + auto oldBase = oldArg->getOperand(0); + ioInfo.key.vals.add(oldBase->getFullType()); + ioInfo.newArgs.add(oldBase); + } else { // If we fail to match any of the cases above - // then a precondition was violated in that - // `isArgSuitableForSpecialization` is allowing - // a case that this routine is not covering. - // - SLANG_UNEXPECTED("mising case in 'getCallInfoForArg'"); + // then the `SpecializeCondition` is letting through constructs that we cannot handle. + SLANG_UNEXPECTED("unexpected function call specialization argument form."); } } @@ -641,7 +697,7 @@ struct FunctionParameterSpecializationContext // will stand in for the parameter in the specialized // function. // - auto newVal = getSpecializedValueForParam(funcInfo, oldParam, oldArg); + auto newVal = getSpecializedValueForParam(funcInfo, oldParam, oldArg, oldCall); // We will collect the replacement value to use // for each of the original parameters in an array. @@ -681,12 +737,13 @@ struct FunctionParameterSpecializationContext IRInst* getSpecializedValueForParam( FuncSpecializationInfo& ioInfo, IRParam* oldParam, - IRInst* oldArg) + IRInst* oldArg, + IRCall* callInst) { // As always, the easy case is when the parameter of // the original function doesn't need specialization. // - if (!doesParamWantSpecialization(oldParam, oldArg)) + if (!doesParamWantSpecialization(oldParam, oldArg, callInst)) { // The specialized callee will need a new parameter // that fills the same role as the old one, so we @@ -718,6 +775,36 @@ struct FunctionParameterSpecializationContext } } + // Returns true if `inst` is an instruction that accesses an element from an array or a buffer. + // + static bool isElementAccessInst(IRInst* inst) + { + switch (inst->getOp()) + { + case kIROp_GetElementPtr: + case kIROp_GetElement: + case kIROp_RWStructuredBufferGetElementPtr: + case kIROp_StructuredBufferLoad: + case kIROp_ByteAddressBufferLoad: + return true; + } + return false; + } + + // Returns true if `inst` is an instruction that accesses a field from a struct, that is + // either a FieldAddress or FieldExtract. + // + static bool isFieldAccessInst(IRInst* inst) + { + switch (inst->getOp()) + { + case kIROp_FieldAddress: + case kIROp_FieldExtract: + return true; + } + return false; + } + IRInst* getSpecializedValueForArg(FuncSpecializationInfo& ioInfo, IRInst* oldArg) { // The logic here parallels `gatherCallInfoForArg`, @@ -735,13 +822,24 @@ struct FunctionParameterSpecializationContext // return globalParam; } + if (isUserPointerType(oldArg->getDataType())) + { + // If argument is a user pointer, we can pass it into the callee + // directly as an oridinary argument without further specializing + // for the access chain beyond the pointer. + // + auto builder = getBuilder(); + auto newParam = builder->createParam(oldArg->getFullType()); + ioInfo.newParams.add(newParam); + return newParam; + } if (auto globalFunc = as<IRGlobalValueWithCode>(oldArg)) { // As above, the identity of the specialized function is sufficient // to resolve the uses return globalFunc; } - else if (oldArg->getOp() == kIROp_GetElement) + else if (isElementAccessInst(oldArg)) { // This is the case where the argument is // in the form `oldBase[oldIndex]`. @@ -801,7 +899,9 @@ struct FunctionParameterSpecializationContext // of things, and then inserted to a more permanent location later. // builder->setInsertLoc(IRInsertLoc()); - auto newVal = builder->emitElementExtract(oldArg->getFullType(), newBase, newIndex); + IRInst* newOperands[] = {newBase, newIndex}; + auto newVal = + builder->emitIntrinsicInst(oldArg->getFullType(), oldArg->getOp(), 2, newOperands); // Because our new instruction wasn't // actually inserted anywhere, we need to @@ -813,6 +913,30 @@ struct FunctionParameterSpecializationContext return newVal; } + else if (isFieldAccessInst(oldArg)) + { + // This is the case where the argument is + // in the form `oldBase.structKey`. + // + auto oldBase = oldArg->getOperand(0); + auto structKey = oldArg->getOperand(1); + + // We handle this case in a similar way as the `oldBase[oldIndex]` + // case, except that we don't need to introduce a new parameter + // for the index, since the struct key is known at compile-time. + auto newBase = getSpecializedValueForArg(ioInfo, oldBase); + + auto builder = getBuilder(); + + builder->setInsertLoc(IRInsertLoc()); + IRInst* newOperands[] = {newBase, structKey}; + auto newVal = + builder->emitIntrinsicInst(oldArg->getFullType(), oldArg->getOp(), 2, newOperands); + + ioInfo.newBodyInsts.add(newVal); + + return newVal; + } else if (auto oldArgLoad = as<IRLoad>(oldArg)) { auto oldPtr = oldArgLoad->getPtr(); @@ -825,15 +949,30 @@ struct FunctionParameterSpecializationContext return newVal; } + else if (auto castHandleToResource = as<IRCastDescriptorHandleToResource>(oldArg)) + { + // We are accessing a resource from a bindless handle. + // We should create a param for the handle, and load the resource from the param. + auto builder = getBuilder(); + auto oldHandle = castHandleToResource->getOperand(0); + auto newHandle = builder->createParam(oldHandle->getFullType()); + ioInfo.newParams.add(newHandle); + + builder->setInsertLoc(IRInsertLoc()); + IRInst* newOperands[] = {newHandle}; + auto newVal = builder->emitIntrinsicInst( + oldArg->getFullType(), + kIROp_CastDescriptorHandleToResource, + 1, + newOperands); + ioInfo.newBodyInsts.add(newVal); + return newVal; + } else { // If we don't match one of the above cases, - // then `isArgSuitableForSpecialization` is - // letting through cases that this function - // hasn't been updated to handle. - // - SLANG_UNEXPECTED("mising case in 'getSpecializedValueForArg'"); - UNREACHABLE_RETURN(nullptr); + // then we are running into an invalid case. + SLANG_UNEXPECTED("unknown argument form for function call specialization."); } } diff --git a/source/slang/slang-ir-specialize-function-call.h b/source/slang/slang-ir-specialize-function-call.h index bab4ce2f4..afb8c2365 100644 --- a/source/slang/slang-ir-specialize-function-call.h +++ b/source/slang/slang-ir-specialize-function-call.h @@ -7,12 +7,14 @@ struct CodeGenContext; struct IRInst; struct IRModule; struct IRParam; +struct IRCall; + class Module; class FunctionCallSpecializeCondition { public: - virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg) = 0; + virtual bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst) = 0; virtual bool isParamSuitableForSpecialization(IRParam* param, IRInst* arg); diff --git a/source/slang/slang-ir-specialize-resources.cpp b/source/slang/slang-ir-specialize-resources.cpp index 871ba2c24..0ac08236f 100644 --- a/source/slang/slang-ir-specialize-resources.cpp +++ b/source/slang/slang-ir-specialize-resources.cpp @@ -20,9 +20,10 @@ struct ResourceParameterSpecializationCondition : FunctionCallSpecializeConditio TargetRequest* targetRequest = nullptr; TargetProgram* targetProgram = nullptr; - bool doesParamWantSpecialization(IRParam* param, IRInst* arg) + bool doesParamWantSpecialization(IRParam* param, IRInst* arg, IRCall* callInst) { SLANG_UNUSED(arg); + SLANG_UNUSED(callInst); // Whether or not a parameter needs specialization is really // a function of its type: diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp index 8584ea95e..551a72fc7 100644 --- a/source/slang/slang-ir-util.cpp +++ b/source/slang/slang-ir-util.cpp @@ -17,6 +17,14 @@ bool isPointerOfType(IRInst* type, IROp opCode) return false; } +bool isUserPointerType(IRInst* type) +{ + auto ptrType = as<IRPtrType>(type); + if (!ptrType) + return false; + return ptrType->getAddressSpace() == AddressSpace::UserPointer; +} + IRType* getVectorElementType(IRType* type) { if (auto vectorType = as<IRVectorType>(type)) @@ -792,35 +800,212 @@ IRInst* getRootAddr(IRInst* addr, List<IRInst*>& outAccessChain, List<IRInst*>* return addr; } -// A simple and conservative address aliasing check. -bool canAddressesPotentiallyAlias(IRGlobalValueWithCode* func, IRInst* addr1, IRInst* addr2) +IRInst* getRootBufferOrAddr(IRInst* addr) { - if (addr1 == addr2) - return true; + auto rootAddr = getRootAddr(addr); + if (as<IRRWStructuredBufferGetElementPtr>(rootAddr)) + { + auto bufferHandle = rootAddr->getOperand(0); + // Check if the bufferHandle itself is a load from a global parameter. + if (auto load = as<IRLoad>(bufferHandle)) + { + auto newRoot = getRootAddr(load->getPtr()); + if (newRoot->getOp() == kIROp_GlobalParam) + return newRoot; + } + } + return rootAddr; +} + +// The aliasing class of an address. This is used to determine +// if two addresses may alias. +enum class AddressAliasingClass +{ + Unknown, + UserPointer, // A user pointer into global memory + Var, // A thread-local or groupshared var. + ConstantBuffer, // A constant buffer or parameter block. + BoundBuffer, // A bound buffer. + BoundTexture, // A bound texture resource. + DescriptorHandle, // A bindless buffer or resource. +}; + +AddressAliasingClass getAliasingClass(IRInst* addr) +{ + if (auto globalParam = as<IRGlobalParam>(addr)) + { + auto type = unwrapArray(globalParam->getDataType()); + if (!type) + return AddressAliasingClass::Unknown; + switch (type->getOp()) + { + case kIROp_TextureType: + return AddressAliasingClass::BoundTexture; + case kIROp_HLSLStructuredBufferType: + case kIROp_HLSLRWStructuredBufferType: + case kIROp_HLSLAppendStructuredBufferType: + case kIROp_HLSLConsumeStructuredBufferType: + case kIROp_HLSLRasterizerOrderedStructuredBufferType: + case kIROp_HLSLByteAddressBufferType: + case kIROp_HLSLRWByteAddressBufferType: + case kIROp_HLSLRasterizerOrderedByteAddressBufferType: + case kIROp_GLSLShaderStorageBufferType: + return AddressAliasingClass::BoundBuffer; + case kIROp_ConstantBufferType: + case kIROp_ParameterBlockType: + return AddressAliasingClass::ConstantBuffer; + case kIROp_PtrType: + if (isUserPointerType(type)) + return AddressAliasingClass::UserPointer; + return AddressAliasingClass::Unknown; + case kIROp_DynamicResourceType: + return AddressAliasingClass::DescriptorHandle; + default: + return AddressAliasingClass::Unknown; + } + } + else if (as<IRVar>(addr)) + return AddressAliasingClass::Var; + else if (as<IRGlobalVar>(addr)) + return AddressAliasingClass::Var; + else if (as<IRRWStructuredBufferGetElementPtr>(addr)) + return AddressAliasingClass::DescriptorHandle; + else if (as<IRCastDescriptorHandleToResource>(addr)) + return AddressAliasingClass::DescriptorHandle; - // Two variables can never alias. - addr1 = getRootAddr(addr1); - addr2 = getRootAddr(addr2); + auto type = addr->getDataType(); + if (isUserPointerType(type)) + return AddressAliasingClass::UserPointer; + return AddressAliasingClass::Unknown; +} - // Global addresses can alias with anything. - if (!isChildInstOf(addr1, func)) +bool canAddrClassesAlias(AddressAliasingClass c1, AddressAliasingClass c2) +{ + if (c1 == AddressAliasingClass::Unknown || c2 == AddressAliasingClass::Unknown) return true; - if (!isChildInstOf(addr2, func)) + switch (c1) + { + case AddressAliasingClass::Unknown: return true; + case AddressAliasingClass::UserPointer: + case AddressAliasingClass::Var: + // A users pointer or var can only alias with another + // object that is either a user pointer or var. + // + // Generally, a var should never alias with anything else that isn't a var, + // if we never allow the user to take address of a local var. + // We don't allow taking addresses of a local var on most GPU targets, but + // we currently do expose an internal intrinsic to do so when targeting CPU. + // We should consider disallowing this across the board, or enable more aggresive + // criteria when targeting GPU backends. + // For now we stay conservative and just report true even when addr1 is var and + // addr2 is not rooted from a var. + // + return c2 == AddressAliasingClass::UserPointer || c2 == AddressAliasingClass::Var; + case AddressAliasingClass::BoundBuffer: + case AddressAliasingClass::BoundTexture: + // A bound resource can only alias with another + // object that is a bound resource or descriptor handle + return c2 == c1 || c2 == AddressAliasingClass::DescriptorHandle; + + case AddressAliasingClass::DescriptorHandle: + // Can alias with any other resource. + switch (c2) + { + case AddressAliasingClass::BoundBuffer: + case AddressAliasingClass::BoundTexture: + case AddressAliasingClass::DescriptorHandle: + return true; + default: + return false; + } + case AddressAliasingClass::ConstantBuffer: + // Constant buffer cannot alias with anything. + return false; + } + // For any other unknown case, assume they may alias. + return true; +} + +// Has `var` being used in a way that may allow it to alias with a user pointer? +bool canVarAliasWithUserPointer(TargetRequest* target, IRInst* var) +{ + if (target && !isCPUTarget(target)) + { + // We don't allow taking the address of a variable on anything other + // than the CPU target. Therefore a var can never alias with a user + // pointer on these targets. + return false; + } + + SLANG_UNUSED(var); + return true; +} + +// A simple and conservative address aliasing check. +bool canAddressesPotentiallyAlias( + TargetRequest* target, + IRGlobalValueWithCode* func, + IRInst* addr1, + IRInst* addr2) +{ + if (addr1 == addr2) + return true; + + addr1 = getRootBufferOrAddr(addr1); + addr2 = getRootBufferOrAddr(addr2); + + auto addr1Class = getAliasingClass(addr1); + auto addr2Class = getAliasingClass(addr2); - if (addr1->getOp() == kIROp_Var && addr2->getOp() == kIROp_Var && addr1 != addr2) + if (!canAddrClassesAlias(addr1Class, addr2Class)) return false; + if (addr1Class == addr2Class) + { + // For these classes of addresses, the identity of the root + // determines whether or not the addresse can alias. + // Note that we assume two different bound resources can never + // alias, and two different variables can never alias. + switch (addr1Class) + { + case AddressAliasingClass::Var: + case AddressAliasingClass::BoundBuffer: + case AddressAliasingClass::BoundTexture: + case AddressAliasingClass::ConstantBuffer: + if (addr1 != addr2) + return false; + break; + } + } + // A param and a var can never alias. if (addr1->getOp() == kIROp_Param && addr1->getParent() == func->getFirstBlock() && addr2->getOp() == kIROp_Var || addr1->getOp() == kIROp_Var && addr2->getOp() == kIROp_Param && addr2->getParent() == func->getFirstBlock()) return false; + + // If one addr is user pointer and one addr is a var, + // they can never alias, if the user code never took the address of + // the var. + if (addr1Class == AddressAliasingClass::Var && addr2Class == AddressAliasingClass::UserPointer) + { + return canVarAliasWithUserPointer(target, addr1); + } + if (addr2Class == AddressAliasingClass::Var && addr1Class == AddressAliasingClass::UserPointer) + { + return canVarAliasWithUserPointer(target, addr2); + } return true; } +bool canAddressesPotentiallyAlias(IRGlobalValueWithCode* func, IRInst* addr1, IRInst* addr2) +{ + return canAddressesPotentiallyAlias(nullptr, func, addr1, addr2); +} + bool isPtrLikeOrHandleType(IRInst* type) { if (!type) @@ -1141,15 +1326,15 @@ bool areCallArgumentsSideEffectFree(IRCall* call, SideEffectAnalysisOptions opti if (isBitSet(options, SideEffectAnalysisOptions::UseDominanceTree)) dom = module->findOrCreateDominatorTree(parentFunc); - // If the pointer argument is a local variable (thus can't alias with other addresses) - // and it is never read from in the function, we can safely treat the call as having - // no side-effect. - // This is a conservative test, but is sufficient to detect the most common case where - // a temporary variable is used as the inout argument and the result stored in the temp - // variable isn't being used elsewhere in the parent func. + // If the pointer argument is a local variable (thus can't alias with other + // addresses) and it is never read from in the function, we can safely treat the + // call as having no side-effect. This is a conservative test, but is sufficient to + // detect the most common case where a temporary variable is used as the inout + // argument and the result stored in the temp variable isn't being used elsewhere in + // the parent func. // - // A more aggresive test can check all other address uses reachable from the call site - // and see if any of them are aliasing with the argument. + // A more aggresive test can check all other address uses reachable from the call + // site and see if any of them are aliasing with the argument. for (auto use = arg->firstUse; use; use = use->nextUse) { if (as<IRDecoration>(use->getUser())) @@ -1323,8 +1508,8 @@ bool doesCalleeHaveSideEffect(IRInst* callee) } } - // If the callee has no side effect, check if any of its associated functions have side effect. - // If so, we want to keep the callee around. + // If the callee has no side effect, check if any of its associated functions have side + // effect. If so, we want to keep the callee around. // // Typically, once the relevant pass has completed, the association is removed, // and at that point we can remove the function. @@ -2230,13 +2415,12 @@ void legalizeDefUse(IRGlobalValueWithCode* func) !(as<IRVar>(inst) && loopHeaderBlockMap.containsKey(block))) continue; - // Normally, if the common dominator is not `block`, we can simply move the definition - // to the common dominator. - // An exception is when the common dominator is the target block of a - // loop. - // Another exception is when a var in the loop condition block is accessed both inside - // and outside the loop. It is technically visible, but effects on the 'var' are not - // visible outside the loop, so we'll need to hoist it out of the loop. + // Normally, if the common dominator is not `block`, we can simply move the + // definition to the common dominator. An exception is when the common dominator is + // the target block of a loop. Another exception is when a var in the loop condition + // block is accessed both inside and outside the loop. It is technically visible, + // but effects on the 'var' are not visible outside the loop, so we'll need to hoist + // it out of the loop. // // Note that after normalization, loops are in the form of: // ``` @@ -2377,9 +2561,9 @@ bool canOperationBeSpecConst(IROp op, IRType* resultType, IRInst* const* fixedAr // Returns true for ops that can be declared as an operation under `OpSpecConstantOp`. // // Integer arithmetic and comparison operations can be `OpSpecConstantOp` with the `Shader` - // capability, while floating-point arithmetic and comparison operations require the `Kernel` - // capability. We only support `Shader` capability for now, return false when floating-point - // arithmetic/comparison is encountered. + // capability, while floating-point arithmetic and comparison operations require the + // `Kernel` capability. We only support `Shader` capability for now, return false when + // floating-point arithmetic/comparison is encountered. switch (op) { case kIROp_Add: diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h index c0410fa3c..b8937d569 100644 --- a/source/slang/slang-ir-util.h +++ b/source/slang/slang-ir-util.h @@ -70,6 +70,8 @@ bool isPointerOfType(IRInst* ptrType, IRInst* elementType); // True if ptrType is a pointer type to a type of opCode bool isPointerOfType(IRInst* ptrType, IROp opCode); +bool isUserPointerType(IRInst* type); + // Builds a dictionary that maps from requirement key to requirement value for `interfaceType`. Dictionary<IRInst*, IRInst*> buildInterfaceRequirementDict(IRInterfaceType* interfaceType); @@ -205,6 +207,12 @@ IRInst* getRootAddr( bool canAddressesPotentiallyAlias(IRGlobalValueWithCode* func, IRInst* addr1, IRInst* addr2); +bool canAddressesPotentiallyAlias( + TargetRequest* target, + IRGlobalValueWithCode* func, + IRInst* addr1, + IRInst* addr2); + String dumpIRToString( IRInst* root, IRDumpOptions options = {IRDumpOptions::Mode::Simplified, IRDumpOptions::Flag::DumpDebugIds}); |
