From 89a1234964a1927c4936a2758f72b7d6c9d0bc73 Mon Sep 17 00:00:00 2001 From: Sai Praveen Bangaru <31557731+saipraveenb25@users.noreply.github.com> Date: Sun, 7 May 2023 13:35:27 -0400 Subject: Optimize logic around indexed temporary variables (#2873) --- source/slang/slang-ir-autodiff-primal-hoist.cpp | 58 ++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'source') diff --git a/source/slang/slang-ir-autodiff-primal-hoist.cpp b/source/slang/slang-ir-autodiff-primal-hoist.cpp index ab23aeb40..353d56cfa 100644 --- a/source/slang/slang-ir-autodiff-primal-hoist.cpp +++ b/source/slang/slang-ir-autodiff-primal-hoist.cpp @@ -36,9 +36,16 @@ static bool isDifferentialBlock(IRBlock* block) return block->findDecoration(); } -static IRBlock* getLoopRegionBodyBlock(IRLoop* loop) +static IRBlock* getLoopConditionBlock(IRLoop* loop) { auto condBlock = as(loop->getTargetBlock()); + SLANG_ASSERT(as(condBlock->getTerminator())); + return condBlock; +} + +static IRBlock* getLoopRegionBodyBlock(IRLoop* loop) +{ + auto condBlock = getLoopConditionBlock(loop); // We assume the loop body always sit at the true side of the if-else. if (auto ifElse = as(condBlock->getTerminator())) { @@ -183,6 +190,12 @@ static Dictionary createPrimalRecomputeBlocks( auto bodyRecomputeBlock = createRecomputeBlock(bodyBlock); bodyRecomputeBlock->insertBefore(diffBodyBlock); diffBodyBlock->replaceUsesWith(bodyRecomputeBlock); + + // Map the primal condition block directly to the diff + // conditon block (we won't create a recompute block for this) + // + recomputeBlockMap[getLoopConditionBlock(loop)] = getLoopConditionBlock(diffLoop); + moveParams(bodyRecomputeBlock, diffBodyBlock); { // After CFG normalization, the loop body will contain only jumps to the @@ -1060,6 +1073,37 @@ static int getInstRegionNestLevel( return (int)result; } +// Trim defBlockIndices based on the indices of out of scope uses. +// +static List maybeTrimIndices( + const List& defBlockIndices, + const Dictionary>& indexedBlockInfo, + const List& outOfScopeUses) +{ + // Go through uses, lookup the defBlockIndices, and remove any indices if they + // are not present in any of the uses. (This is sort of slow...) + // + List result; + for (auto& index : defBlockIndices) + { + bool found = false; + for (auto& use : outOfScopeUses) + { + auto useInst = use->getUser(); + auto useBlock = useInst->getParent(); + auto useBlockIndices = indexedBlockInfo[as(useBlock)].getValue(); + if (useBlockIndices.contains(index)) + { + found = true; + break; + } + } + if (found) + result.add(index); + } + return result; +} + /// Legalizes all accesses to primal insts from recompute and diff blocks. /// @@ -1230,6 +1274,12 @@ RefPtr ensurePrimalAvailability( setInsertAfterOrdinaryInst(&builder, getInstInBlock(storeUse->getUser())); + // There is an edge-case optimization we apply here, + // If none of the out-of-scope uses are actually within the indexed + // region, that means there's no need to allocate a fully indexed var. + // + defBlockIndices = maybeTrimIndices(defBlockIndices, indexedBlockInfo, outOfScopeUses); + IRVar* localVar = storeIndexedValue( &builder, varBlock, @@ -1260,6 +1310,11 @@ RefPtr ensurePrimalAvailability( { defBlockIndices.removeAt(0); } + else + { + // For all others, check out of scope uses and trim indices if possible. + defBlockIndices = maybeTrimIndices(defBlockIndices, indexedBlockInfo, outOfScopeUses); + } setInsertAfterOrdinaryInst(&builder, instToStore); auto localVar = storeIndexedValue(&builder, varBlock, instToStore, defBlockIndices); @@ -1650,6 +1705,7 @@ static bool shouldStoreInst(IRInst* inst) case kIROp_BitXor: case kIROp_Lsh: case kIROp_Rsh: + case kIROp_Select: return false; case kIROp_GetElement: -- cgit v1.2.3