summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArielG-NV <159081215+ArielG-NV@users.noreply.github.com>2025-08-08 13:07:00 -0700
committerGitHub <noreply@github.com>2025-08-08 20:07:00 +0000
commit719772c01a8ee8afa81cded249d6a51e33e17d8d (patch)
tree2bbee35f93dafb452589466f3535da54a49c1501
parent5d1ba37be64980d80e5106f8664a654d907ebaf4 (diff)
Initial copy elision pass regression fix (#8126)
when we have `GetElementPtr -> load -> GetElement` in our use-chain, the final `GetElement` may use the `load` as a `Index`, not a base. This is a non-issue with `getFieldExtract` since a field is a StructKey. We will still add this check to ensure no bugs down the line. --------- Co-authored-by: Harsh Aggarwal <haaggarwal@nvidia.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: slangbot <ellieh+slangbot@nvidia.com> Co-authored-by: slangbot <186143334+slangbot@users.noreply.github.com>
-rw-r--r--source/slang/slang-ir-redundancy-removal.cpp14
-rw-r--r--tests/bugs/gh-8121.slang30
2 files changed, 44 insertions, 0 deletions
diff --git a/source/slang/slang-ir-redundancy-removal.cpp b/source/slang/slang-ir-redundancy-removal.cpp
index 1feab47dd..c52ab7bca 100644
--- a/source/slang/slang-ir-redundancy-removal.cpp
+++ b/source/slang/slang-ir-redundancy-removal.cpp
@@ -446,6 +446,11 @@ bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func)
load,
[&](IRGetElement* getElement)
{
+ // Only optimize if the load
+ // is the base
+ if (getElement->getBase() != load)
+ return;
+
IRBuilder builder(getElement);
builder.setInsertBefore(getElement);
auto newGetElementPtr = builder.emitElementAddress(
@@ -476,6 +481,15 @@ bool eliminateRedundantLoadStore(IRGlobalValueWithCode* func)
load,
[&](IRFieldExtract* fieldExtract)
{
+ // Only optimize if the load
+ // is the base; not strictly
+ // needed for field extract,
+ // but it will prevent future
+ // regressions if a field ever
+ // becomes a non-struct-key
+ if (fieldExtract->getBase() != load)
+ return;
+
IRBuilder builder(fieldExtract);
builder.setInsertBefore(fieldExtract);
auto newGetFieldAddress = builder.emitFieldAddress(
diff --git a/tests/bugs/gh-8121.slang b/tests/bugs/gh-8121.slang
new file mode 100644
index 000000000..777e75b9e
--- /dev/null
+++ b/tests/bugs/gh-8121.slang
@@ -0,0 +1,30 @@
+//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHECK):-cuda -compute -shaderobj
+
+//TEST_INPUT:ubuffer(data=[1 1 1 1], stride=4),name=inputBuffer
+RWStructuredBuffer<int[2]> inputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<int> outputBuffer;
+
+void math(int data[2])
+{
+ // This test is to ensure that we do not break our optimization-pass which optimizes the IR of
+ // `getElement(load(getElementPtr(...)), ...)` into `load(getElementPtr(getElementPtr(...), ...))`.
+ //
+ // This test triggers the bug given the IR: `store(outputBuffer[0], getElement(Array<>, load(getElementPtr(data,0))))`.
+ // `Array<int,2>(5,6)` is not created as a local variable intentionally. We do this because we want to test the case
+ // where `Array<>` must be accessed as a value (`getElement`), not a pointer (`getElementPtr`).
+ //
+ // If this code fails, outputBuffer[0] will contain the value `1`. We expect `6` if the bug is fixed.
+ outputBuffer[0] = Array<int,2>(5,6)[data[0]];
+}
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void computeMain(int3 id: SV_DispatchThreadID)
+{
+ math(inputBuffer[0]);
+}
+
+//CHECK: 6
+//CHECK-NEXT: 0