diff options
| author | Sai Praveen Bangaru <31557731+saipraveenb25@users.noreply.github.com> | 2023-06-07 00:16:19 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-06-07 00:16:19 -0400 |
| commit | 7561ffb3e7d4d55f1a4cf786319e7a07f204a9e3 (patch) | |
| tree | 579bb546e882558c556495293e85ced59d8f002f | |
| parent | 52f27e4c35a42f5b6796a8f1ef5c51937f175d82 (diff) | |
AD: Fix out-of-scope indexing rules for insts in loop header blocks during the primal-inst availability pass (#2918)
* add test case
* Fix out-of-scope indexing rules for loop header blocks
---------
Co-authored-by: Yong He <yhe@nvidia.com>
Co-authored-by: Yong He <yonghe@outlook.com>
4 files changed, 96 insertions, 7 deletions
diff --git a/source/slang/slang-ir-autodiff-primal-hoist.cpp b/source/slang/slang-ir-autodiff-primal-hoist.cpp index 0016f25e3..6947fd7c5 100644 --- a/source/slang/slang-ir-autodiff-primal-hoist.cpp +++ b/source/slang/slang-ir-autodiff-primal-hoist.cpp @@ -961,6 +961,7 @@ IRInst* emitIndexedStoreAddressForVar( IRInst* emitIndexedLoadAddressForVar( IRBuilder* builder, IRVar* localVar, + IRBlock* defBlock, const List<IndexTrackingInfo>& defBlockIndices, const List<IndexTrackingInfo>& useBlockIndices) { @@ -986,12 +987,15 @@ IRInst* emitIndexedLoadAddressForVar( { // If the use-block is outside this region, use the // last available value (by indexing with primal counter minus 1) + // An exception is if the stored inst is in a loop header block where + // we use counter directly (since that block runs N+1 times) // auto primalCounterCurrValue = index.primalCountParam; - auto primalCounterLastValue = builder->emitSub( - primalCounterCurrValue->getDataType(), - primalCounterCurrValue, - builder->getIntValue(builder->getIntType(), 1)); + auto primalCounterLastValue = (index.loopHeaderBlock == defBlock) ? primalCounterCurrValue : + builder->emitSub( + primalCounterCurrValue->getDataType(), + primalCounterCurrValue, + builder->getIntValue(builder->getIntType(), 1)); loadAddr = builder->emitElementAddress( builder->getPtrType(currType), @@ -1021,10 +1025,11 @@ IRVar* storeIndexedValue( IRInst* loadIndexedValue( IRBuilder* builder, IRVar* localVar, + IRBlock* defBlock, const List<IndexTrackingInfo>& defBlockIndices, const List<IndexTrackingInfo>& useBlockIndices) { - IRInst* addr = emitIndexedLoadAddressForVar(builder, localVar, defBlockIndices, useBlockIndices); + IRInst* addr = emitIndexedLoadAddressForVar(builder, localVar, defBlock, defBlockIndices, useBlockIndices); return builder->emitLoad(addr); } @@ -1292,7 +1297,12 @@ RefPtr<HoistedPrimalsInfo> ensurePrimalAvailability( List<IndexTrackingInfo>& useBlockIndices = indexedBlockInfo[getBlock(use->getUser())]; - IRInst* loadAddr = emitIndexedLoadAddressForVar(&builder, localVar, defBlockIndices, useBlockIndices); + IRInst* loadAddr = emitIndexedLoadAddressForVar( + &builder, + localVar, + defBlock, + defBlockIndices, + useBlockIndices); builder.replaceOperand(use, loadAddr); } @@ -1323,7 +1333,9 @@ RefPtr<HoistedPrimalsInfo> ensurePrimalAvailability( { List<IndexTrackingInfo> useBlockIndices = indexedBlockInfo[getBlock(use->getUser())]; setInsertBeforeOrdinaryInst(&builder, getInstInBlock(use->getUser())); - builder.replaceOperand(use, loadIndexedValue(&builder, localVar, defBlockIndices, useBlockIndices)); + builder.replaceOperand( + use, + loadIndexedValue(&builder, localVar, defBlock, defBlockIndices, useBlockIndices)); } if (!isRecomputeInst) @@ -1512,6 +1524,8 @@ void buildIndexedBlocks( IndexTrackingInfo indexInfo = {}; lowerIndexedRegion(primalLoop, loop, indexInfo.primalCountParam, indexInfo.diffCountParam); + indexInfo.loopHeaderBlock = getLoopConditionBlock(primalLoop); + SLANG_RELEASE_ASSERT(indexInfo.primalCountParam); SLANG_RELEASE_ASSERT(indexInfo.diffCountParam); diff --git a/source/slang/slang-ir-autodiff-primal-hoist.h b/source/slang/slang-ir-autodiff-primal-hoist.h index c9377d56b..59c70e862 100644 --- a/source/slang/slang-ir-autodiff-primal-hoist.h +++ b/source/slang/slang-ir-autodiff-primal-hoist.h @@ -211,6 +211,12 @@ namespace Slang IRInst* primalCountParam = nullptr; IRInst* diffCountParam = nullptr; + // Reference to the header block. Note that the header block + // typically contains the loop condition and is executed N+1 + // times if the loop body is executed N times. + // + IRBlock* loopHeaderBlock = nullptr; + enum CountStatus { Unresolved, diff --git a/tests/autodiff/high-order-backward-diff-4.slang b/tests/autodiff/high-order-backward-diff-4.slang new file mode 100644 index 000000000..9ee9aa4c4 --- /dev/null +++ b/tests/autodiff/high-order-backward-diff-4.slang @@ -0,0 +1,64 @@ +//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -output-using-type +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -output-using-type + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer<float> outputBuffer; + +struct A : IDifferentiable +{ + float x; + int nx; +} + +[BackwardDifferentiable] +float f(A x) +{ + A rs; + rs.x = 1.0; + for (int i = 0; i < 2; i++) + rs.x = rs.x * x.x; + return rs.x; +} + +[BackwardDifferentiable] +float outerF(A x) +{ + A nx; + nx.x = 1.0; + int i = 0; + [MaxIters(3)] + do + { + nx.x = nx.x * x.x; + i++; + } + while (i < 2); + nx.nx = 2;//x.nx; + return f(nx); +} + +[BackwardDifferentiable] +float df(A x) +{ + A.Differential ad; + ad.x = 0.0; + var p = diffPair(x, ad); + __bwd_diff(outerF)(p, 1.0); + return p.d.x; +} + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + // Given f(x) = x^4, + // f''(x) = 12 * x^2 + // Expect f''(4) = 192 + A a; + a.x = 4.0; + a.nx = 54; + A.Differential ad; + ad.x = 1.0; + var p = diffPair(a, ad); + outputBuffer[0] = __fwd_diff(df)(p).d; +} diff --git a/tests/autodiff/high-order-backward-diff-4.slang.expected.txt b/tests/autodiff/high-order-backward-diff-4.slang.expected.txt new file mode 100644 index 000000000..0f08247f0 --- /dev/null +++ b/tests/autodiff/high-order-backward-diff-4.slang.expected.txt @@ -0,0 +1,5 @@ +type: float +192.000000 +0.000000 +0.000000 +0.000000 |
