summaryrefslogtreecommitdiff
path: root/source/slang/slang-ir-loop-unroll.cpp
diff options
context:
space:
mode:
authorEllie Hermaszewska <ellieh@nvidia.com>2024-10-29 14:49:26 +0800
committerGitHub <noreply@github.com>2024-10-29 14:49:26 +0800
commitf65d756bff8d4c5cbc15bd0322a2ae8e6b896a21 (patch)
treeea1d61342cd29368e19135000ec2948813096205 /source/slang/slang-ir-loop-unroll.cpp
parenta729c15e9dce9f5116a38afc66329ab2ca4cea54 (diff)
format
* format * Minor test fixes * enable checking cpp format in ci
Diffstat (limited to 'source/slang/slang-ir-loop-unroll.cpp')
-rw-r--r--source/slang/slang-ir-loop-unroll.cpp82
1 files changed, 49 insertions, 33 deletions
diff --git a/source/slang/slang-ir-loop-unroll.cpp b/source/slang/slang-ir-loop-unroll.cpp
index ef0551161..5f9c53bc7 100644
--- a/source/slang/slang-ir-loop-unroll.cpp
+++ b/source/slang/slang-ir-loop-unroll.cpp
@@ -1,13 +1,14 @@
#include "slang-ir-loop-unroll.h"
-#include "slang-ir.h"
+
+#include "../core/slang-performance-profiler.h"
+#include "slang-ir-clone.h"
+#include "slang-ir-dce.h"
+#include "slang-ir-dominators.h"
#include "slang-ir-insts.h"
#include "slang-ir-peephole.h"
-#include "slang-ir-dominators.h"
-#include "slang-ir-clone.h"
-#include "slang-ir-util.h"
#include "slang-ir-simplify-cfg.h"
-#include "slang-ir-dce.h"
-#include "../core/slang-performance-profiler.h"
+#include "slang-ir-util.h"
+#include "slang-ir.h"
namespace Slang
{
@@ -60,8 +61,7 @@ static int _getLoopMaxIterationsToUnroll(IRLoop* loopInst)
auto maxIterCount = as<IRIntLit>(forceUnrollDecor->getOperand(0));
if (maxIterCount && maxIterCount->getValue() != 0)
{
- maxIterations =
- Math::Min((int)maxIterCount->getValue() + 1, kMaxIterationsToAttempt);
+ maxIterations = Math::Min((int)maxIterCount->getValue() + 1, kMaxIterationsToAttempt);
}
return maxIterations;
}
@@ -100,7 +100,9 @@ static void _foldAndSimplifyLoopIteration(
{
if (auto constCondition = as<IRConstant>(cbranch->getCondition()))
{
- auto targetBlock = (constCondition->value.intVal != 0) ? cbranch->getTrueBlock() : cbranch->getFalseBlock();
+ auto targetBlock = (constCondition->value.intVal != 0)
+ ? cbranch->getTrueBlock()
+ : cbranch->getFalseBlock();
builder.setInsertBefore(cbranch);
builder.emitBranch(targetBlock);
cbranch->removeAndDeallocate();
@@ -146,7 +148,8 @@ static void _foldAndSimplifyLoopIteration(
// Unroll loop up to a predefined maximum number of iterations.
// Returns true if we can statically determine that the loop terminated within the iteration limit.
-// This operation assumes the loop does not have `continue` jumps, i.e. continueBlock == targetBlock.
+// This operation assumes the loop does not have `continue` jumps, i.e. continueBlock ==
+// targetBlock.
static bool _unrollLoop(
TargetProgram* targetProgram,
IRModule* module,
@@ -173,13 +176,14 @@ static bool _unrollLoop(
// Insert an outer breakable region so we have a break label to use as the target for
// any `break` jumps in the unrolled loop.
// Transform CFG from [..., loopInst] -> [loopTarget] ->... [originalLoopBreakBlock]
- // Into: [..., loop] -> [outerBreakableRegionHeader, loopInst(phi_arg)] -> [(phi_param) loopTarget] -> ... ->
+ // Into: [..., loop] -> [outerBreakableRegionHeader, loopInst(phi_arg)] -> [(phi_param)
+ // loopTarget] -> ... ->
// [newLoopBreakBlock] -> [originalLoopBreakBlock/outerBreakableRegionBreakBlock]
- // After this transform, the original break block of the loop will serve as the break block for the
- // outer breakable region.
+ // After this transform, the original break block of the loop will serve as the break block for
+ // the outer breakable region.
IRBuilder builder(module);
-
+
auto unreachableBlock = builder.createBlock();
builder.setInsertInto(unreachableBlock);
builder.emitUnreachable();
@@ -187,7 +191,7 @@ static bool _unrollLoop(
auto outerBreakableRegionHeader = builder.createBlock();
outerBreakableRegionHeader->insertBefore(loopInst->getTargetBlock());
-
+
auto newLoopBreakableRegionBreakBlock = builder.createBlock();
newLoopBreakableRegionBreakBlock->insertBefore(loopInst->getBreakBlock());
@@ -265,15 +269,20 @@ static bool _unrollLoop(
cloneEnv.mapOldValToNew[loopInst->getTargetBlock()] = firstIterationBreakBlock;
// Wire up the breakable region blocks.
- // Note that the breakable region header will never have any phi params because there will never
- // be back jumps into the header (it is a single iteration loop just for the break label).
+ // Note that the breakable region header will never have any phi params because there will
+ // never be back jumps into the header (it is a single iteration loop just for the break
+ // label).
builder.setInsertBefore(loopInst);
- builder.emitLoop(firstIterationLoopHeader, firstIterationBreakBlock, firstIterationLoopHeader);
+ builder.emitLoop(
+ firstIterationLoopHeader,
+ firstIterationBreakBlock,
+ firstIterationLoopHeader);
- // The `firstIterationBreakBlock` is supposed to act as the `targetBlock` for the back-jump in the
- // loop body. Therefore, if the original loop target block has any phi params, we will need the
- // same set of phi params in `firstIterationBreakBlock` so keep those branches valid.
+ // The `firstIterationBreakBlock` is supposed to act as the `targetBlock` for the back-jump
+ // in the loop body. Therefore, if the original loop target block has any phi params, we
+ // will need the same set of phi params in `firstIterationBreakBlock` so keep those branches
+ // valid.
builder.setInsertInto(firstIterationBreakBlock);
{
@@ -294,13 +303,14 @@ static bool _unrollLoop(
newParams.getBuffer()));
loopInst->removeAndDeallocate();
- // Update `loopInst` to represent the remaining loop iterations that are yet to be unrolled.
+ // Update `loopInst` to represent the remaining loop iterations that are yet to be
+ // unrolled.
loopInst = newLoopInst;
}
- // With the break region set up and wired, we can now clone the loop body into the break region.
- // We create all the blocks first, and setup the clone mapping for the blocks so when we
- // clone the insts later, the branch targets will automatically set to their clones.
+ // With the break region set up and wired, we can now clone the loop body into the break
+ // region. We create all the blocks first, and setup the clone mapping for the blocks so
+ // when we clone the insts later, the branch targets will automatically set to their clones.
List<IRBlock*> clonedBlocks;
for (auto b : blocks)
@@ -341,7 +351,11 @@ static bool _unrollLoop(
// conditional jumps can be folded into unconditional jumps.
_foldAndSimplifyLoopIteration(
- targetProgram, builder, clonedBlocks, firstIterationBreakBlock, unreachableBlock);
+ targetProgram,
+ builder,
+ clonedBlocks,
+ firstIterationBreakBlock,
+ unreachableBlock);
// Now we have peeled off one iteration from the loop, we check if there are any
// branches into next iteration, if not, the loop terminates and we are done.
@@ -441,7 +455,8 @@ bool unrollLoopsInFunc(
DiagnosticSink* sink)
{
List<IRLoop*> loops = collectLoopsInFunc(
- func, [](IRLoop* l) { return l->findDecoration<IRForceUnrollDecoration>() != nullptr; });
+ func,
+ [](IRLoop* l) { return l->findDecoration<IRForceUnrollDecoration>() != nullptr; });
if (loops.getCount() == 0)
return true;
@@ -476,7 +491,7 @@ bool unrollLoopsInModule(TargetProgram* target, IRModule* module, DiagnosticSink
{
if (as<IRGeneric>(inst))
continue;
-
+
if (auto func = as<IRGlobalValueWithCode>(inst))
{
bool result = unrollLoopsInFunc(target, module, func, sink);
@@ -527,7 +542,7 @@ void eliminateContinueBlocks(IRModule* module, IRLoop* loopInst)
IRBuilder builder(module);
IRBuilderSourceLocRAII sourceLocationScope(&builder, loopInst->sourceLoc);
-
+
auto targetBlock = loopInst->getTargetBlock();
auto innerBreakableRegionHeader = builder.createBlock();
@@ -548,14 +563,14 @@ void eliminateContinueBlocks(IRModule* module, IRLoop* loopInst)
builder.emitLoop(targetBlock, innerBreakableRegionBreakBlock, targetBlock);
continueBlock->replaceUsesWith(innerBreakableRegionBreakBlock);
-
+
builder.setInsertInto(innerBreakableRegionBreakBlock);
moveParams(innerBreakableRegionBreakBlock, continueBlock);
builder.emitBranch(continueBlock);
// If the original loop can be executed up to N times, the new loop may be executed
// upto N+1 times (although most insts are skipped in the last traversal)
- //
+ //
if (auto maxItersDecoration = loopInst->findDecoration<IRLoopMaxItersDecoration>())
{
auto maxIters = maxItersDecoration->getMaxIters();
@@ -567,7 +582,8 @@ void eliminateContinueBlocks(IRModule* module, IRLoop* loopInst)
void eliminateContinueBlocksInFunc(IRModule* module, IRGlobalValueWithCode* func)
{
List<IRLoop*> loops = collectLoopsInFunc(
- func, [](IRLoop* l) { return l->getContinueBlock() != l->getTargetBlock(); });
+ func,
+ [](IRLoop* l) { return l->getContinueBlock() != l->getTargetBlock(); });
if (loops.getCount() == 0)
return;
@@ -578,4 +594,4 @@ void eliminateContinueBlocksInFunc(IRModule* module, IRGlobalValueWithCode* func
}
}
-}
+} // namespace Slang