diff options
| author | Ellie Hermaszewska <ellieh@nvidia.com> | 2023-08-17 13:41:49 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-08-17 13:41:49 +0800 |
| commit | a0ee2bf671d61d1e2b561db3966e57ffc802040f (patch) | |
| tree | c82bbfeb75cf5fa8d630322a2a62010705579056 /source/slang | |
| parent | 3e41d698714a3ab6235e9275d5e0687a1c5db9c9 (diff) | |
Add loop inversion pass (#2899)
* Generalize collectInductionValues
* Support affine transformations of loop index as induction variables
* Test for generalized induction value collection
* Neaten inductive variable finding
* Make types more specific
* Add loop inversion pass
* Test output changes after loop inversion
* Store the type of implication success when finding inductive variables
* Test that loop induction finding does not alway succeed
* Support chains of additions and branches of additions in induction variable finding
* Use c++17 for downstream compilers
* Wiggle expected output for cross compile test after loop inversion
* Add loop inversion test
* Simplify IfElse instructions with a single trivial block
* Invert loops with a user inserted break
* Limit loop inversion to loops with a 4 instruction or less comparison block
* regenerate vs projects
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/slang-ir-fuse-satcoop.cpp | 17 | ||||
| -rw-r--r-- | source/slang/slang-ir-insts.h | 2 | ||||
| -rw-r--r-- | source/slang/slang-ir-loop-inversion.cpp | 305 | ||||
| -rw-r--r-- | source/slang/slang-ir-loop-inversion.h | 10 | ||||
| -rw-r--r-- | source/slang/slang-ir-util.h | 16 | ||||
| -rw-r--r-- | source/slang/slang-ir.cpp | 2 | ||||
| -rw-r--r-- | source/slang/slang-lower-to-ir.cpp | 19 |
7 files changed, 353 insertions, 18 deletions
diff --git a/source/slang/slang-ir-fuse-satcoop.cpp b/source/slang/slang-ir-fuse-satcoop.cpp index 3c827ef25..b672f3f7c 100644 --- a/source/slang/slang-ir-fuse-satcoop.cpp +++ b/source/slang/slang-ir-fuse-satcoop.cpp @@ -4,6 +4,7 @@ #include "slang-ir-insts.h" #include "slang-ir-specialize-function-call.h" #include "slang-ir-ssa-simplification.h" +#include "slang-ir-util.h" #include "slang-ir.h" namespace Slang @@ -13,22 +14,6 @@ namespace Slang // Some helpers // -// Run an operation over every block in a module -template<typename F> -static void overAllBlocks(IRModule* module, F f) -{ - for (auto globalInst : module->getGlobalInsts()) - { - if (auto func = as<IRGlobalValueWithCode>(globalInst)) - { - for (auto block : func->getBlocks()) - { - f(block); - } - } - } -} - static bool uses(IRInst* used, IRInst* user) { for(auto use = used->firstUse; use; use = use->nextUse) diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h index 3503ece79..f4489611d 100644 --- a/source/slang/slang-ir-insts.h +++ b/source/slang/slang-ir-insts.h @@ -3751,7 +3751,7 @@ public: IRBlock* trueBlock, IRBlock* afterBlock); - IRInst* emitIfElse( + IRIfElse* emitIfElse( IRInst* val, IRBlock* trueBlock, IRBlock* falseBlock, diff --git a/source/slang/slang-ir-loop-inversion.cpp b/source/slang/slang-ir-loop-inversion.cpp new file mode 100644 index 000000000..5050ce9d6 --- /dev/null +++ b/source/slang/slang-ir-loop-inversion.cpp @@ -0,0 +1,305 @@ +#include "slang-ir-loop-inversion.h" + +#include "slang-ir-clone.h" +#include "slang-ir-dominators.h" +#include "slang-ir-insts.h" +#include "slang-ir-lower-witness-lookup.h" +#include "slang-ir-reachability.h" +#include "slang-ir-ssa-simplification.h" +#include "slang-ir-util.h" +#include "slang-ir.h" + +namespace Slang +{ + +static bool isSameBlockOrTrivialBranch(IRBlock* target, IRBlock* scrutinee) +{ + if(target == scrutinee) + return true; + const auto br = as<IRUnconditionalBranch>(scrutinee->getFirstOrdinaryInst()); + return br && br->getTargetBlock() == target && br->getArgCount() == 0 && !scrutinee->hasMoreThanOneUse(); +}; + +static bool isSmallBlock(IRBlock* c) +{ + // Somewhat arbitrarily, 4 instructions, enough for: + // - Arith + // - Comparison + // - Negation + // - Terminator + Int n = 0; + for([[maybe_unused]] const auto i : c->getOrdinaryInsts()) + if(++n > 4) + return false; + return true; +} + +// Loops are suitable for inversion if: +// - The loop jumps to a conditional branch which has the break block as one of +// its successors (or a trivial break block which we erase) and the other +// successor is empty +// - The conditional block is "small", because we will be duplicating it +static bool isSuitableForInversion(IRLoop* loop) +{ + const auto nextBlock = loop->getTargetBlock(); + const auto breakBlock = loop->getBreakBlock(); + + // The first thing a loop does must be a conditional + const auto branch = as<IRIfElse>(nextBlock->getTerminator()); + if(!branch) + return false; + + if(!isSmallBlock(nextBlock)) + return false; + + const auto t = branch->getTrueBlock(); + const auto f = branch->getFalseBlock(); + const auto a = branch->getAfterBlock(); + + // + // In principle we could perform this simplification in the cfg simplifier, + // however it relies on slightly more context than is simple to insert + // there, namely that the removed trivial branching block is branching to a + // loop break block. + // + + // Do we break on the 'true' side? + if(isSameBlockOrTrivialBranch(breakBlock, t) && f == a) + { + if(t != breakBlock) + { + branch->trueBlock.set(breakBlock); + t->removeAndDeallocate(); + } + return true; + } + + // ... or the false side + if(isSameBlockOrTrivialBranch(breakBlock, f) && t == a) + { + if(f != breakBlock) + { + branch->falseBlock.set(breakBlock); + f->removeAndDeallocate(); + } + return true; + } + + return false; +} + +static IRParam* duplicateToParamWithDecorations(IRBuilder& builder, IRCloneEnv& cloneEnv, IRInst* i) +{ + const auto p = builder.emitParam(i->getFullType()); + for(const auto dec : i->getDecorations()) + cloneDecoration(&cloneEnv, dec, p, builder.getModule()); + return p; +} + +// Given +// s: ...1 loop break=b next=c1 +// c1: if x then goto b else goto d +// d: goto c1 +// b: ...2 +// +// Produce: +// s: ...1 goto c1 +// c1: if x then goto e1 else goto l +// e1: goto b +// l: loop break=b next=d +// d: goto c2: +// c2: if x then goto e2 else goto e3 +// e3: goto d +// e2: goto b +// b: ...2 +// +// s is the Start block +// c1, c2 are the Condition blocks +// e1, e2, e3 are the critical Edge breakers +// l is the Loop entering block +// d is the loop boDy +// b is the Break block +static void invertLoop(IRBuilder& builder, IRLoop* loop) +{ + IRBuilderInsertLocScope builderScope(&builder); + const auto s = as<IRBlock>(loop->getParent()); + auto domTree = computeDominatorTree(s->getParent()); + SLANG_ASSERT(s); + const auto c1 = loop->getTargetBlock(); + const auto c1Terminator = as<IRConditionalBranch>(c1->getTerminator()); + SLANG_ASSERT(c1Terminator); + const auto b = loop->getBreakBlock(); + auto& c1dUse = c1Terminator->getTrueBlock() == b ? c1Terminator->falseBlock : c1Terminator->trueBlock; + auto& c1bUse = c1Terminator->getTrueBlock() == b ? c1Terminator->trueBlock : c1Terminator->falseBlock; + const auto d = as<IRBlock>(c1dUse.get()); + SLANG_ASSERT(d); + + IRCloneEnv cloneEnv; + cloneEnv.squashChildrenMapping = true; + + // Since we are duplicating the loop break condition block (c1) we must + // introduce phi values for anything in it upon which the rest of the + // program (b onwards) uses. Lift the values fron c1 used in b (and + // onwards) to parameters. To avoid a critical edge, pass these via a new + // block, e1. + builder.setInsertInto(b); + List<IRInst*> c1Params; + for(auto i : IRInstList<IRInst>(c1->getFirstInst(), c1->getLastInst())) + { + IRParam* p = nullptr; + traverseUses(i, [&](IRUse* u){ + auto userBlock = u->getUser()->getParent(); + if(domTree->dominates(b, userBlock)) + { + // A new parameter to replace this 'i' + if(!p) + p = duplicateToParamWithDecorations(builder, cloneEnv, i); + u->set(p); + } + }); + if(p) + c1Params.add(i); + } + auto e1 = builder.emitBlock(); + e1->insertAfter(c1); + builder.emitBranch(b, c1Params.getCount(), c1Params.getBuffer()); + c1bUse.set(e1); + // Similarly, we have to replace any existing 'break's to break via e1 + traverseUses(b, [&](IRUse* u){ + auto userBlock = u->getUser()->getParent(); + // Restrict this to just those blocks within this loop + if(userBlock != e1 && domTree->dominates(s, userBlock) && !domTree->dominates(b, userBlock)) + u->set(e1); + }); + // We now have + // s: ...1 loop break=b next=c1 + // c1: if x then goto e1 else goto d + // e1: goto b + // d: goto c1 + // b: ...2 + + // Duplicate c1 into c2, and using the same cloneEnv, duplicate e1 into e2 + builder.setInsertInto(builder.getFunc()); + const auto c2 = as<IRBlock>(cloneInst(&cloneEnv, &builder, c1)); + c2->insertBefore(c1); + const auto e2 = as<IRBlock>(cloneInst(&cloneEnv, &builder, e1)); + e2->insertAfter(c2); + const auto c2Terminator = as<IRConditionalBranch>(c2->getTerminator()); + auto& c2eUse = c2Terminator->getTrueBlock() == e1 ? c2Terminator->trueBlock : c2Terminator->falseBlock; + c2eUse.set(e2); + builder.setInsertAfter(c2Terminator); + const auto newC2Terminator = builder.emitIfElse(c2Terminator->getCondition(), c2Terminator->getTrueBlock(), c2Terminator->getFalseBlock(), b); + c2Terminator->removeAndDeallocate(); + // We now have + // s: ...1 loop break=b next=c1 + // c2: if x then goto e2 else goto d + // e2: goto b + // c1: if x then goto e1 else goto d + // e1: goto b + // d: goto c1 + // b: ...2 + + // move the loop instruction to its own block, l + const auto l = builder.emitBlock(); + l->insertAfter(e2); + loop->insertAtEnd(l); + // We now have + // s: ...1 no-termiator + // c2: if x then goto e2 else goto d + // e2: goto b + // l: loop break=b next=c1 + // c1: if x then goto e1 else goto d + // e1: goto b + // d: goto c1 + // b: ...2 + + // add a new terminator to s. A jump to c2, our outer conditional. retain + // any parameters the loop instruction passed to c1 + builder.setInsertInto(s); + List<IRInst*> as; + for(UInt i = 0; i < loop->getArgCount(); ++i) + as.add(loop->getArg(i)); + builder.emitBranch(c2, as.getCount(), as.getBuffer()); + // We now have + // s: ...1, goto c2 + // c2: if x then goto e2 else goto d + // e2: goto b + // l: loop break=b next=c1 + // c1: if x then goto e1 else goto d + // e1: goto b + // d: goto c1 + // b: ...2 + + // modify c2 to jump to the new loop + auto& c2dUse = newC2Terminator->getTrueBlock() == e2 ? newC2Terminator->falseBlock : newC2Terminator->trueBlock; + c2dUse.set(l); + // We now have + // s: ...1, goto c2 + // c2: if x then goto e2 else goto l + // e2: goto b + // l: loop break=b next=c1 + // c1: if x then goto e1 else goto d + // e1: goto b + // d: goto c1 + // b: ...2 + + // + // Now we can modify the loop to jump to the block after the first + // conditional, d, as we know that it won't break out of the loop on the + // first iteration + // + // Beyond just retargeting the loop instruction, we need to make sure any + // parameters the loop instruction is passing to c1 are instead passed to + // 'd', and because we've added parameters to 'd' we need to forward them + // from c1 also which we will accomplish using a new block, e3, + loop->block.set(d); + loop->breakBlock.set(e1); + SLANG_ASSERT(d->getFirstParam() == nullptr); + c1->insertBefore(b); + e1->insertAfter(c1); + List<IRInst*> ps; + for(const auto p : c1->getParams()) + ps.add(p); + builder.setInsertInto(d); + for(const auto p : ps) + { + const auto q = duplicateToParamWithDecorations(builder, cloneEnv, p); + // Replace all uses, except for those in c1 and e1 + List<IRUse*> uses; + traverseUses(p, [&](IRUse* u){if(u->user->getParent() != c1 && u->user->getParent() != e1) uses.add(u);}); + for(auto u : uses) + u->set(q); + } + const auto e3 = builder.emitBlock(); + e3->insertAfter(c1); + builder.emitBranch(d, ps.getCount(), ps.getBuffer()); + c1dUse.set(e3); + // We now have the desired output + // s: ...1, goto c2 + // c2: if x then goto e2 else goto l + // e2: goto b + // l: loop break=e1 next=d + // d: goto c1 + // c1: if x then goto e1 else goto e3 + // e3: goto d + // e1: goto b + // b: ...2 +} + +bool invertLoops(IRModule* module) +{ + IRBuilder builder(module); + List<IRLoop*> toInvert; + overAllBlocks(module, [&](auto b){ + if(auto loop = as<IRLoop>(b->getTerminator())) + { + if(isSuitableForInversion(loop)) + toInvert.add(loop); + } + }); + for(const auto loop : toInvert) + invertLoop(builder, loop); + return toInvert.getCount() > 0; +} + +} // namespace Slang diff --git a/source/slang/slang-ir-loop-inversion.h b/source/slang/slang-ir-loop-inversion.h new file mode 100644 index 000000000..66c386d57 --- /dev/null +++ b/source/slang/slang-ir-loop-inversion.h @@ -0,0 +1,10 @@ +#pragma once + +namespace Slang +{ + struct IRModule; + struct IRInst; + + bool invertLoops(IRModule* module); +} + diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h index a0336e1c2..57a6c7c92 100644 --- a/source/slang/slang-ir-util.h +++ b/source/slang/slang-ir-util.h @@ -224,6 +224,22 @@ bool isOne(IRInst* inst); void initializeScratchData(IRInst* inst); void resetScratchDataBit(IRInst* inst, int bitIndex); +// Run an operation over every block in a module +template<typename F> +static void overAllBlocks(IRModule* module, F f) +{ + for (auto globalInst : module->getGlobalInsts()) + { + if (auto func = as<IRGlobalValueWithCode>(globalInst)) + { + for (auto block : func->getBlocks()) + { + f(block); + } + } + } +} + } #endif diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index 421c60d1d..1a499842d 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -5134,7 +5134,7 @@ namespace Slang return inst; } - IRInst* IRBuilder::emitIfElse( + IRIfElse* IRBuilder::emitIfElse( IRInst* val, IRBlock* trueBlock, IRBlock* falseBlock, diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index 74ece5bc9..1ed79fbe3 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -9,6 +9,7 @@ #include "../core/slang-performance-profiler.h" #include "slang-check.h" +#include "slang-ir-loop-inversion.h" #include "slang-ir.h" #include "slang-ir-constexpr.h" #include "slang-ir-dce.h" @@ -9702,6 +9703,24 @@ RefPtr<IRModule> generateIRForTranslationUnit( // performMandatoryEarlyInlining(module); + // Where possible, move loop condition checks to the end of loops, and wrap + // the loop in an 'if(condition)'. + // This makes it so that if sccp can see that the loop will always loop + // at least once it can record this information by removing the outer + // conditional. + // This has advantages: + // - Uninitialized variable usage detection doesn't have to + // worry about a loop never being executed. + // - The loop condition is evaluated one fewer times. + // - Allegedly better performance on pipelined processors: + // https://en.wikipedia.org/wiki/Loop_inversion + // + // And disadvantages + // - If sccp is unable to eliminate the outer 'if' then we end up with + // duplicated code the the conditional value. Users don't tend to put + // huge gobs of code in the conditional expression in loops however. + invertLoops(module); + // Next, attempt to promote local variables to SSA // temporaries and do basic simplifications. // |
