summaryrefslogtreecommitdiffstats
path: root/source
diff options
context:
space:
mode:
authorJay Kwak <82421531+jkwak-work@users.noreply.github.com>2025-09-23 23:47:30 -0700
committerGitHub <noreply@github.com>2025-09-24 06:47:30 +0000
commit9c2024a7509baae921083d49a56e1321c51f00ec (patch)
tree1f66721d16ee032893418b8ef4f7e23a1714c034 /source
parent979e16a34ef9ff2806476b809e2dcba5a96c40d4 (diff)
Remove unnecessary Load and Store pair (#8433)
This commit removes unnecessary Load and Store pairs in IR. When the IR is like ``` let %1 = var let %2 = load(%ptr) store(%1 %2) ``` This PR will replace all uses of %1 with %ptr. And the load and store instructions will be removed. But I found that there can be cases where %2 might be still used later in other IRs. For these cases, the removal of load instruction relies on DCE. --------- Co-authored-by: slangbot <ellieh+slangbot@nvidia.com>
Diffstat (limited to 'source')
-rw-r--r--source/slang/slang-ir-dce.h1
-rw-r--r--source/slang/slang-ir-redundancy-removal.cpp197
-rw-r--r--source/slang/slang-ir-transform-params-to-constref.cpp17
3 files changed, 202 insertions, 13 deletions
diff --git a/source/slang/slang-ir-dce.h b/source/slang/slang-ir-dce.h
index 869ebc36a..4df491881 100644
--- a/source/slang/slang-ir-dce.h
+++ b/source/slang/slang-ir-dce.h
@@ -35,4 +35,5 @@ bool shouldInstBeLiveIfParentIsLive(IRInst* inst, IRDeadCodeEliminationOptions o
bool isWeakReferenceOperand(IRInst* inst, UInt operandIndex);
bool trimOptimizableTypes(IRModule* module);
+
} // namespace Slang
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index c52ab7bca..94bb6b67c 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -126,6 +126,202 @@ bool removeRedundancy(IRModule* module, bool hoistLoopInvariantInsts)
return changed;
}
+bool isAddressMutable(IRInst* inst)
+{
+ auto rootType = getRootAddr(inst)->getDataType();
+ switch (rootType->getOp())
+ {
+ case kIROp_ParameterBlockType:
+ case kIROp_ConstantBufferType:
+ case kIROp_ConstRefType:
+ return false; // immutable
+
+ // We should consider StructuredBuffer as mutable by default, since the resources may alias.
+ // There could be anotherRWStructuredBuffer pointing to the same memory location as the
+ // structured buffer.
+ case kIROp_StructuredBufferLoad:
+ case kIROp_GetStructuredBufferPtr:
+ return true; // mutable
+ }
+
+ // Similarly, IRPtrTypeBase should also be considered writable always,
+ // because there can be aliasing.
+
+ return true; // mutable
+}
+
+/// Eliminate redundant temporary variable copies in load-store patterns.
+/// This optimization looks for patterns where a value is loaded from memory
+/// and immediately stored to a temporary variable, which is then only used
+/// in read-only contexts. In such cases, the temporary variable and the
+/// load-store indirection can be eliminated by using the original memory
+/// location directly.
+/// Returns true if any changes were made.
+static bool eliminateRedundantTemporaryCopyInFunc(IRFunc* func)
+{
+ // Consider the following IR pattern:
+ // ```
+ // let %temp = var
+ // let %value = load(%sourcePtr)
+ // store(%temp, %value)
+ // ```
+ // We can replace "%temp" with "%sourcePtr" without the load and store indirection
+ // if "%temp" is used only in read-only contexts.
+
+ bool overallChanged = false;
+ for (bool changed = true; changed;)
+ {
+ changed = false;
+
+ HashSet<IRInst*> toRemove;
+
+ for (auto block : func->getBlocks())
+ {
+ for (auto blockInst : block->getChildren())
+ {
+ auto storeInst = as<IRStore>(blockInst);
+ if (!storeInst)
+ {
+ // We are interested only in IRStore.
+ continue;
+ }
+
+ auto storedValue = storeInst->getVal();
+ auto destPtr = storeInst->getPtr();
+
+ if (destPtr->getOp() != kIROp_Var)
+ {
+ // Only optimize temporary variable.
+ // Don't optimize stores to permanent memory locations.
+ continue;
+ }
+
+ // Check if we're storing a load result
+ auto loadInst = as<IRLoad>(storedValue);
+ if (!loadInst)
+ {
+ // Skip because only IRLoad is expected for the optimization.
+ continue;
+ }
+
+ auto loadPtr = loadInst->getPtr();
+
+ if (isAddressMutable(loadPtr))
+ {
+ // If the input is mutable, we cannot optimize,
+ // because any function calls may alter the content of the input
+ // and we cannot replace the temporary copy with a memory pointer.
+ continue;
+ }
+
+ // Storing address-sapce for later use.
+ AddressSpace loadAddressSpace = AddressSpace::Generic;
+ if (auto rootPtrType = as<IRPtrTypeBase>(getRootAddr(loadPtr)->getDataType()))
+ {
+ loadAddressSpace = rootPtrType->getAddressSpace();
+ }
+
+ // Do not optimize loads from semantic parameters because some semantics have
+ // builtin types that are vector types but pretend to be scalar types (e.g.,
+ // SV_DispatchThreadID is used as 'int id' but maps to 'float3
+ // gl_GlobalInvocationID'). The legalization step must remove the load instruction
+ // to maintain this pretense, which breaks our load/store optimization assumptions.
+ // Skip optimization when loading from semantics to let legalization handle the load
+ // removal.
+ if (auto param = as<IRParam>(loadPtr))
+ if (param->findDecoration<IRSemanticDecoration>())
+ continue;
+
+ // Check all uses of the destination variable
+ for (auto use = destPtr->firstUse; use; use = use->nextUse)
+ {
+ auto user = use->getUser();
+ if (user == storeInst)
+ {
+ // Skip the store itself
+ continue; // check the next use
+ }
+
+ if (as<IRStore>(use->getUser()))
+ {
+ // We cannot optimize when the variable is reused
+ // with another store.
+ goto unsafeToOptimize;
+ }
+
+ if (as<IRLoad>(user))
+ {
+ // Allow loads because IRLoad is read-only operation
+ continue; // Check the next use
+ }
+
+ // For function calls, check if the pointer is treated as immutable.
+ if (auto call = as<IRCall>(user))
+ {
+ auto callee = call->getCallee();
+ auto funcInst = as<IRFunc>(callee);
+ if (!funcInst)
+ goto unsafeToOptimize;
+
+ UIndex argIndex = (UIndex)(use - call->getArgs());
+ SLANG_ASSERT(argIndex < call->getArgCount());
+ SLANG_ASSERT(call->getArg(argIndex) == destPtr);
+
+ IRParam* param = funcInst->getFirstParam();
+ for (UIndex i = 0; i < argIndex; i++)
+ {
+ if (param)
+ param = param->getNextParam();
+ }
+ if (nullptr == param)
+ goto unsafeToOptimize; // IRFunc might be incomplete yet
+
+ if (auto paramPtrType = as<IRConstRefType>(param->getFullType()))
+ {
+ if (paramPtrType->getAddressSpace() != loadAddressSpace)
+ goto unsafeToOptimize; // incompatible address space
+
+ continue; // safe so far and check the next use
+ }
+ goto unsafeToOptimize; // must be const-ref
+ }
+
+ // TODO: there might be more cases that is safe to optimize
+ // We need to add more cases here as needed.
+
+ // If we get here, the pointer is used with an unexpected IR.
+ goto unsafeToOptimize;
+ }
+
+ // If we get here, all uses are safe to optimize.
+
+ // Replace all uses of destPtr with loadedPtr
+ destPtr->replaceUsesWith(loadPtr);
+
+ // Mark instructions for removal
+ toRemove.add(storeInst);
+ toRemove.add(destPtr);
+
+ // Note: loadInst might be still in use.
+ // We need to rely on DCE to delete it if unused.
+
+ changed = true;
+ overallChanged = true;
+
+ unsafeToOptimize:;
+ }
+ }
+
+ // Remove marked instructions
+ for (auto instToRemove : toRemove)
+ {
+ instToRemove->removeAndDeallocate();
+ }
+ }
+
+ return overallChanged;
+}
+
bool removeRedundancyInFunc(IRGlobalValueWithCode* func, bool hoistLoopInvariantInsts)
{
auto root = func->getFirstBlock();
@@ -163,6 +359,7 @@ bool removeRedundancyInFunc(IRGlobalValueWithCode* func, bool hoistLoopInvariant
if (auto normalFunc = as<IRFunc>(func))
{
result |= eliminateRedundantLoadStore(normalFunc);
+ result |= eliminateRedundantTemporaryCopyInFunc(normalFunc);
}
return result;
}
diff --git a/source/slang/slang-ir-transform-params-to-constref.cpp b/source/slang/slang-ir-transform-params-to-constref.cpp
index 8f4bcd037..9328a1de1 100644
--- a/source/slang/slang-ir-transform-params-to-constref.cpp
+++ b/source/slang/slang-ir-transform-params-to-constref.cpp
@@ -60,7 +60,7 @@ struct TransformParamsToConstRefContext
case kIROp_FieldExtract:
{
// Transform the IRFieldExtract into a IRFieldAddress
- auto fieldExtract = as<IRFieldExtract>(use->getUser());
+ auto fieldExtract = as<IRFieldExtract>(user);
builder.setInsertBefore(fieldExtract);
auto fieldAddr = builder.emitFieldAddress(
fieldExtract->getBase(),
@@ -73,8 +73,7 @@ struct TransformParamsToConstRefContext
case kIROp_GetElement:
{
// Transform the IRGetElement into a IRGetElementPtr
- auto getElement = as<IRGetElement>(use->getUser());
-
+ auto getElement = as<IRGetElement>(user);
builder.setInsertBefore(getElement);
auto elemAddr = builder.emitElementAddress(
getElement->getBase(),
@@ -111,14 +110,8 @@ struct TransformParamsToConstRefContext
List<IRInst*> newArgs;
// Transform arguments to match the updated-parameter
- IRParam* param = func->getFirstParam();
UInt i = 0;
- auto iterate = [&]()
- {
- param = param->getNextParam();
- i++;
- };
- for (; param; iterate())
+ for (IRParam* param = func->getFirstParam(); param; param = param->getNextParam(), i++)
{
auto arg = call->getArg(i);
if (!updatedParams.contains(param))
@@ -183,7 +176,6 @@ struct TransformParamsToConstRefContext
void processFunc(IRFunc* func)
{
HashSet<IRParam*> updatedParams;
- bool hasTransformedParams = false;
// First pass: Transform parameter types
for (auto param = func->getFirstParam(); param; param = param->getNextParam())
@@ -203,13 +195,12 @@ struct TransformParamsToConstRefContext
auto constRefType = builder.getConstRefType(paramType, AddressSpace::ThreadLocal);
param->setFullType(constRefType);
- hasTransformedParams = true;
changed = true;
updatedParams.add(param);
}
}
- if (!hasTransformedParams)
+ if (updatedParams.getCount() == 0)
{
return;
}