summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSai Praveen Bangaru <31557731+saipraveenb25@users.noreply.github.com>2023-05-07 13:35:27 -0400
committerGitHub <noreply@github.com>2023-05-07 10:35:27 -0700
commit89a1234964a1927c4936a2758f72b7d6c9d0bc73 (patch)
treea5967857c68419795cdf39cd0eb2e8ade29cf763
parent271dc1b98d3887b6297c5407dc67692716687f4d (diff)
Optimize logic around indexed temporary variables (#2873)
-rw-r--r--source/slang/slang-ir-autodiff-primal-hoist.cpp58
-rw-r--r--tests/autodiff/reverse-loop-checkpoint-test.slang95
-rw-r--r--tests/autodiff/reverse-loop-checkpoint-test.slang.expected.txt6
3 files changed, 158 insertions, 1 deletions
diff --git a/source/slang/slang-ir-autodiff-primal-hoist.cpp b/source/slang/slang-ir-autodiff-primal-hoist.cpp
index ab23aeb40..353d56cfa 100644
--- a/source/slang/slang-ir-autodiff-primal-hoist.cpp
+++ b/source/slang/slang-ir-autodiff-primal-hoist.cpp
@@ -36,9 +36,16 @@ static bool isDifferentialBlock(IRBlock* block)
return block->findDecoration<IRDifferentialInstDecoration>();
}
-static IRBlock* getLoopRegionBodyBlock(IRLoop* loop)
+static IRBlock* getLoopConditionBlock(IRLoop* loop)
{
auto condBlock = as<IRBlock>(loop->getTargetBlock());
+ SLANG_ASSERT(as<IRIfElse>(condBlock->getTerminator()));
+ return condBlock;
+}
+
+static IRBlock* getLoopRegionBodyBlock(IRLoop* loop)
+{
+ auto condBlock = getLoopConditionBlock(loop);
// We assume the loop body always sit at the true side of the if-else.
if (auto ifElse = as<IRIfElse>(condBlock->getTerminator()))
{
@@ -183,6 +190,12 @@ static Dictionary<IRBlock*, IRBlock*> createPrimalRecomputeBlocks(
auto bodyRecomputeBlock = createRecomputeBlock(bodyBlock);
bodyRecomputeBlock->insertBefore(diffBodyBlock);
diffBodyBlock->replaceUsesWith(bodyRecomputeBlock);
+
+ // Map the primal condition block directly to the diff
+ // conditon block (we won't create a recompute block for this)
+ //
+ recomputeBlockMap[getLoopConditionBlock(loop)] = getLoopConditionBlock(diffLoop);
+
moveParams(bodyRecomputeBlock, diffBodyBlock);
{
// After CFG normalization, the loop body will contain only jumps to the
@@ -1060,6 +1073,37 @@ static int getInstRegionNestLevel(
return (int)result;
}
+// Trim defBlockIndices based on the indices of out of scope uses.
+//
+static List<IndexTrackingInfo> maybeTrimIndices(
+ const List<IndexTrackingInfo>& defBlockIndices,
+ const Dictionary<IRBlock*, List<IndexTrackingInfo>>& indexedBlockInfo,
+ const List<IRUse*>& outOfScopeUses)
+{
+ // Go through uses, lookup the defBlockIndices, and remove any indices if they
+ // are not present in any of the uses. (This is sort of slow...)
+ //
+ List<IndexTrackingInfo> result;
+ for (auto& index : defBlockIndices)
+ {
+ bool found = false;
+ for (auto& use : outOfScopeUses)
+ {
+ auto useInst = use->getUser();
+ auto useBlock = useInst->getParent();
+ auto useBlockIndices = indexedBlockInfo[as<IRBlock>(useBlock)].getValue();
+ if (useBlockIndices.contains(index))
+ {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ result.add(index);
+ }
+ return result;
+}
+
/// Legalizes all accesses to primal insts from recompute and diff blocks.
///
@@ -1230,6 +1274,12 @@ RefPtr<HoistedPrimalsInfo> ensurePrimalAvailability(
setInsertAfterOrdinaryInst(&builder, getInstInBlock(storeUse->getUser()));
+ // There is an edge-case optimization we apply here,
+ // If none of the out-of-scope uses are actually within the indexed
+ // region, that means there's no need to allocate a fully indexed var.
+ //
+ defBlockIndices = maybeTrimIndices(defBlockIndices, indexedBlockInfo, outOfScopeUses);
+
IRVar* localVar = storeIndexedValue(
&builder,
varBlock,
@@ -1260,6 +1310,11 @@ RefPtr<HoistedPrimalsInfo> ensurePrimalAvailability(
{
defBlockIndices.removeAt(0);
}
+ else
+ {
+ // For all others, check out of scope uses and trim indices if possible.
+ defBlockIndices = maybeTrimIndices(defBlockIndices, indexedBlockInfo, outOfScopeUses);
+ }
setInsertAfterOrdinaryInst(&builder, instToStore);
auto localVar = storeIndexedValue(&builder, varBlock, instToStore, defBlockIndices);
@@ -1650,6 +1705,7 @@ static bool shouldStoreInst(IRInst* inst)
case kIROp_BitXor:
case kIROp_Lsh:
case kIROp_Rsh:
+ case kIROp_Select:
return false;
case kIROp_GetElement:
diff --git a/tests/autodiff/reverse-loop-checkpoint-test.slang b/tests/autodiff/reverse-loop-checkpoint-test.slang
new file mode 100644
index 000000000..732360013
--- /dev/null
+++ b/tests/autodiff/reverse-loop-checkpoint-test.slang
@@ -0,0 +1,95 @@
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type
+//TEST:SIMPLE(filecheck=CHECK): -target hlsl -profile cs_5_0 -entry computeMain -line-directive-mode none
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0 0], stride=4):out,name=outputBuffer
+RWStructuredBuffer<float> outputBuffer;
+
+typedef DifferentialPair<float> dpfloat;
+typedef float.Differential dfloat;
+
+// Test that compute does not have a context.
+// CHECK-NOT: struct {{[a-zA-Z0-9_]*}}_compute_{{[a-zA-Z0-9_]*}}
+
+[BackwardDifferentiable]
+[PreferRecompute]
+float compute(float x, float y)
+{
+ return x * y;
+}
+
+[BackwardDifferentiable]
+[ForceInline]
+float infinitesimal(float x)
+{
+ return x - detach(x);
+}
+
+// Test that computeLoop compiles to just return 0.
+// CHECK: float3 computeLoop{{[_0-9]*}}(float y{{[_0-9]*}})
+// CHECK-NOT: for{{.*}}
+// CHECK: return (float3)0
+
+// Test that computeLoop's intermediates have no float sitting
+// around (must not cache the outvar from 'compute()')
+// CHECK: struct s_bwd_computeLoop_Intermediates
+// CHECK-NEXT: {
+// CHECK-NOT: {{[A-Za-z0-9_]+}} {{[A-Za-z0-9_]+}}[{{.*}}]
+// CHECK: }
+
+[PreferRecompute]
+[BackwardDifferentiable]
+[ForceInline]
+float3 infinitesimal(float3 x)
+{
+ return x - detach(x);
+}
+
+[BackwardDifferentiable]
+[PreferRecompute]
+float3 computeLoop(float y)
+{
+ float w = 0;
+ float3 w3 = float3(0, 0, 0);
+
+ for (int i = 0; i < 8; i++)
+ {
+ float k = compute(i, y);
+ float g = select(k > 0.0, k, 0.0);
+ w += g;
+ w3 += float3(g) * y;
+ }
+
+ var p = (w3 / float3(w));
+ return infinitesimal(p);
+}
+
+// Since computeLoop is recomputed, test_simple_loop should have nothing to store
+// therefore we check that there is no intermediate context type generated for test_simple_loop.
+
+// CHECK-NOT: struct {{[a-zA-Z0-9_]*}}test_simple_loop{{[a-zA-Z0-9_]*}}
+[BackwardDifferentiable]
+float test_simple_loop(float y)
+{
+ float3 x = computeLoop(y);
+ return y + x.x;
+}
+
+[numthreads(1, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ {
+ dpfloat dpa = dpfloat(1.0, 0.0);
+
+ __bwd_diff(test_simple_loop)(dpa, 1.0f);
+ outputBuffer[0] = dpa.d; // Expect: 1.0
+ }
+
+ {
+ dpfloat dpa = dpfloat(0.4, 0.0);
+
+ __bwd_diff(test_simple_loop)(dpa, 0.5f);
+ outputBuffer[1] = dpa.d; // Expect: 2.0
+ }
+
+ outputBuffer[2] = computeLoop(1.0).x;
+}
diff --git a/tests/autodiff/reverse-loop-checkpoint-test.slang.expected.txt b/tests/autodiff/reverse-loop-checkpoint-test.slang.expected.txt
new file mode 100644
index 000000000..86aa47f11
--- /dev/null
+++ b/tests/autodiff/reverse-loop-checkpoint-test.slang.expected.txt
@@ -0,0 +1,6 @@
+type: float
+2.000000
+1.000000
+0.000000
+0.000000
+0.000000