diff options
| author | Ellie Hermaszewska <ellieh@nvidia.com> | 2023-11-07 05:21:49 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-11-06 13:21:49 -0800 |
| commit | da9e0adb5cf2b1bed6075a3afe93cfdcceabe904 (patch) | |
| tree | 7ac721efa3821c1d304a20f0ea3a2ee1e3482f23 /source | |
| parent | 79677b83870577fbad9ce65a731d3ae8a4c553c1 (diff) | |
Correctly pass values from the conditional block to the loop during inversion (#3311)
Co-authored-by: Yong He <yonghe@outlook.com>
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/slang-ir-loop-inversion.cpp | 99 |
1 files changed, 74 insertions, 25 deletions
diff --git a/source/slang/slang-ir-loop-inversion.cpp b/source/slang/slang-ir-loop-inversion.cpp index e2f9b435c..c811bf357 100644 --- a/source/slang/slang-ir-loop-inversion.cpp +++ b/source/slang/slang-ir-loop-inversion.cpp @@ -137,6 +137,8 @@ static IRParam* duplicateToParamWithDecorations(IRBuilder& builder, IRCloneEnv& static void invertLoop(IRBuilder& builder, IRLoop* loop) { IRBuilderInsertLocScope builderScope(&builder); + builder.setInsertInto(loop->getParent()); + const auto s = as<IRBlock>(loop->getParent()); auto domTree = computeDominatorTree(s->getParent()); SLANG_ASSERT(s); @@ -152,34 +154,58 @@ static void invertLoop(IRBuilder& builder, IRLoop* loop) IRCloneEnv cloneEnv; cloneEnv.squashChildrenMapping = true; + // We don't expect 'd' to have any parameters, because it used to be the + // target of a conditional branch + SLANG_ASSERT(d->getFirstParam() == nullptr); + // Since we are duplicating the loop break condition block (c1) we must // introduce phi values for anything in it upon which the rest of the - // program (b onwards) uses. Lift the values fron c1 used in b (and - // onwards) to parameters. To avoid a critical edge, pass these via a new - // block, e1. - builder.setInsertInto(b); - List<IRInst*> c1Params; + // program (inside the loop, and b onwards) uses. Lift the values from c1 + // used in b (and onwards) to parameters, the same for those used before b. + // To avoid a critical edge, pass these via a new block, e1. + // For any such values used within the loop we can pass directly to d. + // + // c1PostLoopParams are values form c1 used after the loop + List<IRInst*> c1PostLoopParams; + // c1LoopParams are values from c1 used within the loop itself + List<IRInst*> c1LoopParams; for(auto i : IRInstList<IRInst>(c1->getFirstInst(), c1->getLastInst())) { - IRParam* p = nullptr; + IRParam* postLoopParam = nullptr; + IRParam* loopParam = nullptr; traverseUses(i, [&](IRUse* u){ auto userBlock = u->getUser()->getParent(); if(domTree->dominates(b, userBlock)) { // A new parameter to replace this 'i' - if(!p) - p = duplicateToParamWithDecorations(builder, cloneEnv, i); - u->set(p); + if(!postLoopParam) + { + postLoopParam = duplicateToParamWithDecorations(builder, cloneEnv, i); + b->addParam(postLoopParam); + } + u->set(postLoopParam); + } + else if(userBlock != c1) + { + // A new parameter to replace this 'i' + if(!loopParam) + { + loopParam = duplicateToParamWithDecorations(builder, cloneEnv, i); + d->addParam(loopParam); + } + u->set(loopParam); } }); - if(p) - c1Params.add(i); + if(postLoopParam) + c1PostLoopParams.add(i); + if(loopParam) + c1LoopParams.add(i); } // Create another break block b2 that will act as the new break block for the // loop. The original break block b will become the merge point for the outer condition. // - auto b2 = builder.emitBlock(); + const auto b2 = builder.emitBlock(); b2->insertBefore(b); // Create a copy of the parameters in b. b2 will simply pass these to b. @@ -194,7 +220,7 @@ static void invertLoop(IRBuilder& builder, IRLoop* loop) auto e1 = builder.emitBlock(); e1->insertAfter(c1); - builder.emitBranch(b2, c1Params.getCount(), c1Params.getBuffer()); + builder.emitBranch(b2, c1PostLoopParams.getCount(), c1PostLoopParams.getBuffer()); c1bUse.set(e1); c1Terminator->afterBlock.set(d); @@ -204,11 +230,15 @@ static void invertLoop(IRBuilder& builder, IRLoop* loop) traverseUses(b, [&](IRUse* u){ auto userBlock = u->getUser()->getParent(); // Restrict this to just those blocks within this loop - if(userBlock != e1 && userBlock != b2 && domTree->dominates(s, userBlock) && !domTree->dominates(b, userBlock)) + if(userBlock != e1 + && userBlock != b2 + && userBlock != s + && domTree->dominates(s, userBlock) + && !domTree->dominates(b, userBlock)) { - auto jumpToB2Block = builder.emitBlock(); + const auto jumpToB2Block = builder.emitBlock(); jumpToB2Block->insertAfter(userBlock); - builder.emitBranch(b2, c1Params.getCount(), c1Params.getBuffer()); + builder.emitBranch(b2, c1PostLoopParams.getCount(), c1PostLoopParams.getBuffer()); u->set(jumpToB2Block); } }); @@ -231,7 +261,12 @@ static void invertLoop(IRBuilder& builder, IRLoop* loop) auto& c2eUse = c2Terminator->getTrueBlock() == e1 ? c2Terminator->trueBlock : c2Terminator->falseBlock; c2eUse.set(e2); builder.setInsertAfter(c2Terminator); - const auto newC2Terminator = builder.emitIfElse(c2Terminator->getCondition(), c2Terminator->getTrueBlock(), c2Terminator->getFalseBlock(), b); + const auto newC2Terminator = builder.emitIfElse( + c2Terminator->getCondition(), + c2Terminator->getTrueBlock(), + c2Terminator->getFalseBlock(), + b + ); c2Terminator->removeAndDeallocate(); // The cloned e2 will branch into b2 by default, rewrite it to branch to b, the correct merge point. SLANG_ASSERT(cast<IRUnconditionalBranch>(e2->getTerminator())->getTargetBlock() == b2); @@ -251,6 +286,7 @@ static void invertLoop(IRBuilder& builder, IRLoop* loop) const auto l = builder.emitBlock(); l->insertAfter(e2); loop->insertAtEnd(l); + // We now have // s: ...1 no-termiator // c2: if x then goto e2 else goto d (merge at b) @@ -306,22 +342,35 @@ static void invertLoop(IRBuilder& builder, IRLoop* loop) // Beyond just retargeting the loop instruction, we need to make sure any // parameters the loop instruction is passing to c1 are instead passed to // 'd', and because we've added parameters to 'd' we need to forward them - // from c1 also which we will accomplish using a new block, e3, + // from c1 also during the back-edge, which we will accomplish using a new + // block, e3, // - loop->block.set(d); - loop->breakBlock.set(b2); + // Utilize the cloneenv to make sure that when entering the loop we use + // c1's instructions as cloned into c2 + builder.setInsertAfter(loop); + List<IRInst*> loopEntryArgs; + for(const auto p : c1LoopParams) + loopEntryArgs.add(cloneInst(&cloneEnv, &builder, p)); + for(UInt i = 0; i < loop->getArgCount(); ++i) + loopEntryArgs.add(loop->getArg(i)); + const auto newLoop = builder.emitLoop(d, b2, loop->getContinueBlock(), loopEntryArgs.getCount(), loopEntryArgs.getBuffer()); + newLoop->sourceLoc = loop->sourceLoc; + loop->transferDecorationsTo(newLoop); + loop->removeAndDeallocate(); + // TODO: This really upsets a few later passes, why isn't it ok to do given // our "irrelevant continue" condition? // loop->continueBlock.set(loop->getTargetBlock()); - SLANG_ASSERT(d->getFirstParam() == nullptr); c1->insertBefore(b2); e1->insertAfter(c1); - List<IRInst*> ps; - for(const auto p : c1->getParams()) - ps.add(p); builder.setInsertInto(d); - for(const auto p : ps) + // Add the necessary parameters for the loop state to d, first the + // paramters for instructions in the duplicated conditional block, then the + // ones from the loop body. + List<IRInst*> ps = c1LoopParams; + for(const auto p : c1->getParams()) { + ps.add(p); const auto q = duplicateToParamWithDecorations(builder, cloneEnv, p); // Replace all uses, except for those in c1 and e1 List<IRUse*> uses; |
