summaryrefslogtreecommitdiffstats
path: root/source/slang
diff options
context:
space:
mode:
Diffstat (limited to 'source/slang')
-rw-r--r--source/slang/slang-emit-c-like.cpp276
-rw-r--r--source/slang/slang-emit-c-like.h9
-rw-r--r--source/slang/slang-emit-cpp.cpp4
-rw-r--r--source/slang/slang-emit.cpp68
-rw-r--r--source/slang/slang-ir-eliminate-phis.cpp894
-rw-r--r--source/slang/slang-ir-eliminate-phis.h16
-rw-r--r--source/slang/slang-ir-restructure-scoping.cpp109
7 files changed, 1036 insertions, 340 deletions
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 06e1e782b..6b8939d85 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2081,13 +2081,8 @@ void CLikeSourceEmitter::_emitInst(IRInst* inst)
case kIROp_Var:
{
- auto ptrType = cast<IRPtrType>(inst->getDataType());
- auto valType = ptrType->getValueType();
-
- auto name = getName(inst);
- emitRateQualifiers(inst);
- emitType(valType, name);
- m_writer->emit(";\n");
+ auto var = cast<IRVar>(inst);
+ emitVar(var);
}
break;
@@ -2222,212 +2217,6 @@ void CLikeSourceEmitter::emitLayoutSemantics(IRInst* inst, char const* uniformSe
emitLayoutSemanticsImpl(inst, uniformSemanticSpelling);
}
-void CLikeSourceEmitter::emitPhiVarAssignments(UInt argCount, IRUse* args, IRBlock* targetBlock)
-{
- // The basic setup here is that we are at the end of a block
- // and are about to branch to `targetBlock`. The target block
- // has parameters (representing the phi variables/nodes in SSA
- // form), and the block we are branching from provided arguments
- // (given as `args` and `argCount` here) to be passed to those
- // parameters.
- //
- // An earlier step will have already emitted a local variable
- // declaration for each phi node (block parameter), so we should
- // be able to "pass" an argument to a parameter by assigning
- // to the variable that represents the parameter.
- //
- // A naive approach would simply loop over the parameters/arguments
- // in tandem and emit assignments like:
- //
- // <param_i> = <arg_i>;
- //
- // This approach has an important and known failure case, which
- // can occur when one of the arguments is also one of the parameters.
- //
- // If we have a block like:
- //
- // block b(
- // param x: Int,
- // y: Int)
- // ...
- //
- // and then a branch to it like:
- //
- // br(b, y, x);
- //
- // Then the naive approach for emitting the assignments for
- // that branch would output:
- //
- // x = y;
- // y = x;
- //
- // But that is not the semantics that the "parameter passing"
- // model is meant to capture.
- //
- // The simplest way to restore the correct semantics would
- // be to assign each of the arguments to a temporary, and
- // then to assign those temporaries to the parameters:
- //
- // let tmp0 = y;
- // let tmp1 = x;
- // x = tmp0;
- // y = tmp1;
- //
- // Adding temporaries like that unconditionally would clutter
- // up the generated code, so it would be nicer to only
- // introduce them when strictly necessary.
- //
- // A temporary should only be necessary when we have:
- //
- // * A parameter `destParam` of the `targetBlock`
- // * Where the argument value for `destParam` is another parameter, `srcParam`, of `targetBlock`
- // * And `srcParam` appears before `destParam` in the parameter list
- //
- // We start by looking for such cases, and collecting the
- // set of block parameters that will require a defensive
- // temporary.
- //
- // Note: an alternative approach would first look for a total
- // order on the arguments/parameters such that we avoid
- // the problem, and then only resort to introducing temporaries
- // as a means of breaking cycles.
- //
- // We will track the parameters that need temporaries by
- // building a dictionary mapping parameters to the name
- // of the temporary they will use, if any.
- //
- Dictionary<IRParam*, String> mapParamToTempName;
- {
- // To know whether a parameter occurs earlier in
- // the list than another, we will rely on a set
- // of parameters we have already "seen" during
- // our iteration.
- //
- // TODO: This is a strong candidate for using a
- // hash set optimized for a small number of entries
- // (and thus using a stack allocation in the common case),
- // given that most blocks will only have a handful
- // of parameters.
- //
- HashSet<IRParam*> seenParams;
-
- UInt argCounter = 0;
- for (auto destParam = targetBlock->getFirstParam(); destParam; destParam = destParam->getNextParam())
- {
- seenParams.Add(destParam);
-
- UInt argIndex = argCounter++;
- if (argIndex >= argCount)
- {
- SLANG_UNEXPECTED("not enough arguments for branch");
- break;
- }
- IRInst* arg = args[argIndex].get();
-
- // Is the argument also a parameter of the same block?
- //
- // If not, then we don't have to worry about this assignment.
- //
- IRParam* srcParam = as<IRParam>(arg);
- if(!srcParam)
- continue;
- if(srcParam->getParent() != targetBlock)
- continue;
-
- // Is the argument an *earlier* parameter of the same block?
- // We test this by checking if we've already seen it during
- // our iteration.
- //
- // If the parameter isn't an earlier one, then we don't have
- // to worry about the order of assignment creating problems.
- //
- if(!seenParams.Contains(srcParam))
- continue;
-
- // At this point we've detected a problem case: the `dstParam`
- // would naively be assigned the value of `srcParam` *after*
- // we've already assigned to `srcParam`.
- //
- // We will need to avoid the problem by introducing a temporary
- // for `srcParam`.
- //
- if( !mapParamToTempName.ContainsKey(srcParam) )
- {
- String tempName = "tmp_" + getName(srcParam);
- mapParamToTempName.Add(srcParam, tempName);
- }
- }
- }
-
- // Now that we've determined which of the parameters could cause problems,
- // we can do an initial round of assignments, where we move form the
- // argument values into either a parameter (if there were no problems)
- // or a temporary (if we needed to avoid a problem).
- //
- {
- UInt argCounter = 0;
- for (auto param = targetBlock->getFirstParam(); param; param = param->getNextParam())
- {
- UInt argIndex = argCounter++;
- if (argIndex >= argCount)
- {
- SLANG_UNEXPECTED("not enough arguments for branch");
- break;
- }
- IRInst* arg = args[argIndex].get();
-
- auto outerPrec = getInfo(EmitOp::General);
- auto prec = getInfo(EmitOp::Assign);
-
- String tempName;
- if( mapParamToTempName.TryGetValue(param, tempName) )
- {
- // This parameter was involved in a confounding assignment,
- // where it was used as the argument value for a later
- // parameter on this same branch/edge.
- //
- // We need to assign to a temporary instead of
- // to the parameter itself. We will declare
- // the temporary here so that the assignment
- // of the argument serves as the initializer.
- //
- emitType(param->getFullType(), tempName);
- }
- else
- {
- // This parameter was not involved in any confounding assignments,
- // so we can simply us the parameter itself as the left-hand side
- // of an assignment.
- //
- emitOperand(param, leftSide(outerPrec, prec));
- }
-
- m_writer->emit(" = ");
- emitOperand(arg, rightSide(prec, outerPrec));
- m_writer->emit(";\n");
- }
- }
-
- // Finally, after all the assignments from argument values have been completed,
- // we will make a cleanup pass that copies from any of the temporary variables
- // we introduced over to the parameter that needed a temporary.
- //
- for (auto param = targetBlock->getFirstParam(); param; param = param->getNextParam())
- {
- String tempName;
- if( !mapParamToTempName.TryGetValue(param, tempName) )
- continue;
-
- auto outerPrec = getInfo(EmitOp::General);
- auto prec = getInfo(EmitOp::Assign);
-
- emitOperand(param, leftSide(outerPrec, prec));
- m_writer->emit(" = ");
- m_writer->emit(tempName);
- m_writer->emit(";\n");
- }
-}
-
void CLikeSourceEmitter::emitRegion(Region* inRegion)
{
// We will use a loop so that we can process sequential (simple)
@@ -2478,43 +2267,6 @@ void CLikeSourceEmitter::emitRegion(Region* inRegion)
// separate `Region`s for them.
emitInst(terminator);
break;
-
- // We will also handle any unconditional branches
- // here, since they may have arguments to pass
- // to the target block (our encoding of SSA
- // "phi" operations).
- //
- // TODO: A better approach would be to move out of SSA
- // as an IR pass, and introduce explicit variables to
- // replace any "phi nodes." This would avoid possible
- // complications if we ever end up in the bad case where
- // one of the block arguments on a branch is also
- // a parameter of the target block, so that the order
- // of operations is important.
- //
- case kIROp_unconditionalBranch:
- {
- auto t = (IRUnconditionalBranch*)terminator;
- UInt argCount = t->getOperandCount();
- static const UInt kFixedArgCount = 1;
- emitPhiVarAssignments(
- argCount - kFixedArgCount,
- t->getOperands() + kFixedArgCount,
- t->getTargetBlock());
- }
- break;
- case kIROp_loop:
- {
- auto t = (IRLoop*) terminator;
- UInt argCount = t->getOperandCount();
- static const UInt kFixedArgCount = 3;
- emitPhiVarAssignments(
- argCount - kFixedArgCount,
- t->getOperands() + kFixedArgCount,
- t->getTargetBlock());
-
- }
- break;
}
// If the terminator required a full region to represent
@@ -2666,26 +2418,6 @@ void CLikeSourceEmitter::emitEntryPointAttributes(IRFunc* irFunc, IREntryPointDe
emitEntryPointAttributesImpl(irFunc, entryPointDecor);
}
-void CLikeSourceEmitter::emitPhiVarDecls(IRFunc* func)
-{
- // We will skip the first block, since its parameters are
- // the parameters of the whole function.
- auto bb = func->getFirstBlock();
- if (!bb)
- return;
- bb = bb->getNextBlock();
-
- for (; bb; bb = bb->getNextBlock())
- {
- for (auto pp = bb->getFirstParam(); pp; pp = pp->getNextParam())
- {
- emitTempModifiers(pp);
- emitType(pp->getFullType(), getName(pp));
- m_writer->emit(";\n");
- }
- }
-}
-
void CLikeSourceEmitter::emitFunctionBody(IRGlobalValueWithCode* code)
{
// Compute a structured region tree that can represent
@@ -2782,10 +2514,6 @@ void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func)
m_writer->emit("\n{\n");
m_writer->indent();
- // HACK: forward-declare all the local variables needed for the
- // parameters of non-entry blocks.
- emitPhiVarDecls(func);
-
// Need to emit the operations in the blocks of the function
emitFunctionBody(func);
diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h
index 25bafecf9..109731566 100644
--- a/source/slang/slang-emit-c-like.h
+++ b/source/slang/slang-emit-c-like.h
@@ -321,13 +321,6 @@ public:
void emitLayoutSemantics(IRInst* inst, char const* uniformSemanticSpelling = "register");
- // When we are about to traverse an edge from one block to another,
- // we need to emit the assignments that conceptually occur "along"
- // the edge. In traditional SSA these are the phi nodes in the
- // target block, while in our representation these use the arguments
- // to the branch instruction to fill in the parameters of the target.
- void emitPhiVarAssignments(UInt argCount, IRUse* args, IRBlock* targetBlock);
-
/// Emit high-level language statements from a structured region.
void emitRegion(Region* inRegion);
@@ -339,8 +332,6 @@ public:
void emitEntryPointAttributes(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor);
- void emitPhiVarDecls(IRFunc* func);
-
/// Emit high-level statements for the body of a function.
void emitFunctionBody(IRGlobalValueWithCode* code);
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 93bc5c979..37b8bfbce 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -1921,10 +1921,6 @@ void CPPSourceEmitter::emitSimpleFuncImpl(IRFunc* func)
m_writer->emit("\n{\n");
m_writer->indent();
- // HACK: forward-declare all the local variables needed for the
- // parameters of non-entry blocks.
- emitPhiVarDecls(func);
-
// Need to emit the operations in the blocks of the function
emitFunctionBody(func);
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index 1b94c588b..0539f9d1d 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -10,6 +10,7 @@
#include "slang-ir-collect-global-uniforms.h"
#include "slang-ir-dce.h"
#include "slang-ir-dll-import.h"
+#include "slang-ir-eliminate-phis.h"
#include "slang-ir-entry-point-uniforms.h"
#include "slang-ir-entry-point-raw-ptr-params.h"
#include "slang-ir-explicit-global-context.h"
@@ -731,39 +732,54 @@ Result linkAndOptimizeIR(
lowerBitCast(targetRequest, irModule);
simplifyIR(irModule);
- // TODO(JS): We probably want to add a pass that moves phi-node temporaries to
- // IR.
- //
- // Currently these are added as part of emit in
- // emitPhiVarAssignments and emitPhiVarDecls
- //
- // A possible mechanism might be:
- // 1) Find all of the parameters passed between blocks
- // 2) Make a variable for each one of them
- // This could be at the scope for the function, or more ideally a scope that is 'most appropriate' for how the parameter is passed
- // ie the closest scope such that the variable is in scope across the branch.
- // 3) Replace all uses of the parameters passed into a block (except the entry block), with the temporary
- // 3a) Remove the parameters from the start of a block (other than the entry block)
- // 4) For all of the branches in a function
- // 4a) For each parameter passed in the branch, assign to the temporary
- // 4b) Replace the branch with a branch that has no parameters
//
- // This should lead to an equivalent function, where the parameter passing between blocks is removed, and all the temporaries
- // are explicit in the output.
- //
- // I guess there could be a desire to combine the liveness tracking into this pass, because once a phi-temporary has been moved
- // we have lost(?) information about liveness. That could potentially be recovered, but for the phi-temporaries, their
- // initial liveness is trivial, it's when the assignment takes place, at the branch point.
- //
- // If all the temporaries were marked as such, then this would be fairly trivial to recreate.
-
- // TODO(JS): Without a pass to make all variables (including phi ones), the liveness tracking can't track everything
+ // Downstream targets may benefit from having live-range information for
+ // local variables, and our IR currently encodes a reasonably good version
+ // of that information. At this point we will insert live-range markers
+ // for local variables, on when such markers are requested.
+ //
+ // After this point in optimization, any passes that introduce new
+ // temporary variables into the IR module should take responsibility for
+ // producing their own live-range information.
+ //
if (codeGenContext->shouldTrackLiveness())
{
addLivenessTrackingToModule(irModule);
dumpIRIfEnabled(codeGenContext, irModule, "LIVENESS");
+ }
+
+ // As a late step, we need to take the SSA-form IR and move things *out*
+ // of SSA form, by eliminating all "phi nodes" (block parameters) and
+ // introducing explicit temporaries instead. Doing this at the IR level
+ // means that subsequent emit logic doesn't need to contend with the
+ // complexities of blocks with parameters.
+ //
+ eliminatePhis(codeGenContext, irModule);
+#if 0
+ dumpIRIfEnabled(codeGenContext, irModule, "PHIS ELIMINATED");
+#endif
+ // TODO: We need to insert the logic that fixes variable scoping issues
+ // here (rather than doing it very late in the emit process), because
+ // otherwise the `applyGLSLLiveness()` operation below wouldn't be
+ // able to see the live-range information that pass would need to add.
+ // For now we are avoiding that problem by simply *not* emitting live-range
+ // information when we fix variable scoping later on.
+
+ // Depending on the target, certain things that were represented as
+ // single IR instructions will need to be emitted with the help of
+ // function declaratons in output high-level code.
+ //
+ // One example of this is the live-range information, which needs
+ // to be output to GLSL code that uses a glslang extension for
+ // supporting function declarations that map directly to SPIR-V opcodes.
+ //
+ // We execute a pass here to transform any live-range instructions
+ // in the module into function calls, for the targets that require it.
+ //
+ if (codeGenContext->shouldTrackLiveness())
+ {
if (isKhronosTarget(targetRequest))
{
applyGLSLLiveness(irModule);
diff --git a/source/slang/slang-ir-eliminate-phis.cpp b/source/slang/slang-ir-eliminate-phis.cpp
new file mode 100644
index 000000000..2891376eb
--- /dev/null
+++ b/source/slang/slang-ir-eliminate-phis.cpp
@@ -0,0 +1,894 @@
+// slang-ir-eliminate-phis.cpp
+#include "slang-ir-eliminate-phis.h"
+
+// This file implements a pass to take code in the Slang IR out out SSA form
+// by eliminating all "phi nodes."
+//
+// The Slang IR does not represent phi operations in the "textbook" way, and
+// it is important to understand how it *does* represent those operations in
+// order to understand this pass.
+//
+// In a textbook encoding of SSA, a basic block begins with zero or more `phi`
+// instructions:
+//
+// block B:
+// let x = phi(a, b);
+// let y = phi(c, d);
+// ...
+//
+// Each `phi` operation has a number of operands equal to the number of
+// predecessors of `B`, with one operand corresponding to each predecessor.
+// Semantically, we usually think of a `phi` as auto-magically selecting between
+// its operands beased on the control-flow path that was used to arrive at `B`.
+// Note, however, that all of the `phi` operations at the start of a block must
+// be understood as executing *simultaneously*, so that if `x` or `y` above was
+// used as a `phi` operand, they would yield their value from *before* the binding
+// not after. Oh, and also the operands of `phi` instructions are the *one* place
+// where the rule about how "defs must dominate uses" isn't upheld.
+//
+// Our encoding of the same thing is equivalent, more consistent, and in most ways
+// easier to understand. A basic block in the Slang IR can have *parameters*,
+// and a branch to such a block must pass *arguments* for those parameters:
+//
+// block B(x,y):
+// ...
+//
+// block M:
+// ...
+// br B(a, c);
+//
+// block N:
+// ...
+// br B(b, d);
+//
+// Note how in this formulation there is no auto-magical behavior. The relationship
+// between a predecessor block and the values that `x, y` take on is clear. The
+// way that `x, y` take on their new values simulataneously is also more apparent
+// (since it intuitively matches a recursive function call). Further, our IR is
+// able to follow the "def dominates use" rule more completely.
+//
+// With that out of the way, let's dive into the pass itself.
+
+#include "slang-ir.h"
+#include "slang-ir-insts.h"
+#include "slang-ir-dominators.h"
+
+namespace Slang {
+
+struct PhiEliminationContext
+{
+ // We are going to make some effort to re-use intermediate structures across
+ // different functions that we process, so that we don't do more allocation
+ // than necessary.
+ //
+ // At the top level, our pas needs to have access to the IR module, and needs
+ // a builder it can use to generate code.
+ //
+ CodeGenContext* m_codeGenContext = nullptr;
+ IRModule* m_module = nullptr;
+ SharedIRBuilder m_sharedBuilder;
+ IRBuilder m_builder;
+
+ PhiEliminationContext(CodeGenContext* codeGenContext, IRModule* module)
+ : m_codeGenContext(codeGenContext)
+ , m_module(module)
+ , m_sharedBuilder(module)
+ , m_builder(m_sharedBuilder)
+ {}
+
+ // We start with the top-down logic of the pass, which is to process
+ // the functions in the module one at a time.
+ //
+ // Note that global variables are also included here, since they are
+ // also `IRGlobalValueWithCode`s, but we don't typically expect them
+ // to have more than one basic block, much less phis.
+ //
+ void eliminatePhisInModule()
+ {
+ for (auto inst : m_module->getGlobalInsts())
+ {
+ switch (inst->getOp())
+ {
+ default:
+ continue;
+
+ case kIROp_Func:
+ case kIROp_GlobalVar:
+ break;
+ }
+
+ auto code = (IRGlobalValueWithCode*)inst;
+ eliminatePhisInFunc(code);
+ }
+ }
+
+ // Within a single function, we are primarily concerned with processing
+ // each of its blocks.
+ //
+ void eliminatePhisInFunc(IRGlobalValueWithCode* func)
+ {
+ initializePerFuncState(func);
+
+ // The first block in a function is always the entry block,
+ // and its parameters are different than those of the other blocks;
+ // they represent the parameters of the *function*. We therefore
+ // need to skip the first block.
+ //
+ auto firstBlock = func->getFirstBlock();
+ for (auto block : func->getBlocks())
+ {
+ if (block == firstBlock)
+ {
+ continue;
+ }
+
+ eliminatePhisInBlock(block);
+ }
+ }
+
+ // In order to facilitate breaking things down into subroutines, we use a
+ // member variable to track the function currently being processed, and
+ // also a dominator tree for that function.
+ //
+ IRGlobalValueWithCode* m_func = nullptr;
+ RefPtr<IRDominatorTree> m_dominatorTree;
+
+ // Because we use the same `PhiEliminationContext` to process all of
+ // the functions in a module, we need to set up these per-function
+ // state variables for each new function encountered.
+ //
+ void initializePerFuncState(IRGlobalValueWithCode* func)
+ {
+ m_func = func;
+ m_dominatorTree = nullptr;
+ }
+
+ // The dominator tree for the function is computed on demand and
+ // cached. We do this to avoid the expensive of allocating the
+ // `IRDominatorTree` structure in cases where a function doesn't
+ // end up having any phis that need elimination. Note that any
+ // "straight-line" function taht doesn't involve control flow
+ // will never have any phis, so we expect that case to be common.
+ //
+ IRDominatorTree* getDominatorTree()
+ {
+ if (!m_dominatorTree)
+ {
+ m_dominatorTree = computeDominatorTree(m_func);
+ }
+ return m_dominatorTree;
+ }
+
+ // The meat of the work happens on a per-basic-block basis.
+ //
+ void eliminatePhisInBlock(IRBlock* block)
+ {
+ // We start by checking if the block has any parameters.
+ // If it doesn't then there is nothing to eliminate.
+ //
+ if( !block->getFirstParam() )
+ {
+ return;
+ }
+
+ // Once the early-exit case has been dealt with, the overall
+ // process amounts to three simple steps.
+ //
+ // 1. Create a temporary corresponding to each of the
+ // parameters of `block`.
+ //
+ createTempsForParams(block);
+ //
+ // 2. For each predecessor of `block`, eliminate the arguments
+ // it passes, by assigning them to the temporaries.
+ //
+ emitAssignmentsInPredecessors(block);
+ //
+ // 3. Replace all (remaining) uses of the block parameters with
+ // loads from the temporaries.
+ //
+ replaceParamUsesWithTemps();
+ }
+
+ // We need to record information about the parameters and the temporaries
+ // we create for them, so that subsequent steps can easily access them.
+ //
+ struct ParamInfo
+ {
+ IRParam* param = nullptr;
+ IRVar* temp = nullptr;
+
+ // We track one additional field for each parameter, which is
+ // used to record its "current" value for the purposes of
+ // emitting assignments at the end of a predecessor block.
+ //
+ // The `currentVal` field will either be the same as `param`
+ // itself (if using the parameter directly is still safe) or
+ // a value loaded from `temp`, after the point where this
+ // parameter has been assigned to.
+ //
+ IRInst* currentVal = nullptr;
+ };
+
+ // We build a mapping from block parameters to their indices,
+ // which makes it convenient to look up whether a given `IRInst*`
+ // is a parameter and, if so, get its index in a single operation.
+ //
+ Dictionary<IRInst*, Index> mapParamToIndex;
+
+ void createTempsForParams(IRBlock* block)
+ {
+ // The temporaries used to replace the parameters of `block`
+ // must be read-able any where that the parameters were accessible.
+ // They must also be write-able at every point that branches
+ // *into* `block`. The most narrowly-scoepd place that meets both
+ // of those criteria is the *immediate dominator* of `block`.
+ //
+ if (auto blockForTemps = getDominatorTree()->getImmediateDominator(block))
+ {
+ // We will insert our new teporary variables at the *end* of the
+ // immediate dominator block, right before the terminator. In
+ // the case where the immediate dominator is also one of the
+ // predecessors of `block`, that terminator will branch to `block`,
+ // and we need the temporary variables to be in scope there, but
+ // no earlier.
+ //
+ auto terminator = blockForTemps->getTerminator();
+ SLANG_ASSERT(terminator);
+ m_builder.setInsertBefore(terminator);
+ }
+ else
+ {
+ // There are two cases where a `block` would not have an immedidate
+ // dominator. The first is that is that it is the entry block of
+ // its function, but we already skipped over such blocks earlier.
+ // The second case is that `block` is unreachable.
+ //
+ // In the case of an unreachable block, it doesn't especially
+ // matter what we do. In principle we could leave such blocks
+ // as-is and expect later steps to ignore tham and/or their
+ // parameters.
+ //
+ // In an attempt to make this code as robust as possible, we
+ // will handle any unreachable blocks by inserting the
+ // temporary variables right after the parameters (which means
+ // right *before* the ordinary body instructions).
+ //
+ // Note that nothing in the code here will *initialize* those
+ // temporaries, so if the unreachable code were to somehow
+ // get executed, the values would be undefined.
+ //
+ auto firstOrdinaryInst = block->getFirstOrdinaryInst();
+ SLANG_ASSERT(firstOrdinaryInst);
+ m_builder.setInsertBefore(firstOrdinaryInst);
+ }
+
+ // Now that we've set up the IR builder for inserting our
+ // temporaries, we are going to iterate over the parameters
+ // and create a temporary for each. Along the way we will
+ // be building up auxilliary data structures that the
+ // subsequent steps will make use of.
+ //
+ mapParamToIndex.Clear();
+ phiInfos.clear();
+ Count paramCounter = 0;
+ for (auto param : block->getParams())
+ {
+ Index paramIndex = paramCounter++;
+ mapParamToIndex.Add(param, paramIndex);
+
+ // Note that the `emitVar` operation expects to be passed the
+ // type *stored* in the variable, but the IR `var` instruction
+ // itself will have a pointer type. Thus if `param` has type
+ // `T`, then `temp` will have type `T*`.
+ //
+ auto temp = m_builder.emitVar(param->getDataType());
+ //
+ // Because we will be eliminating the paramter, we can transfer
+ // any decorations that were added to it (notably any name hint)
+ // to the temporary that will replace it.
+ //
+ param->transferDecorationsTo(temp);
+
+ // The other main auxilliary sxtructure is used to track
+ // both per-parameter information (which we can fill in
+ // here) and information about each value *assigned* to
+ // a parameter at a branch site. Both kinds of information
+ // are stored in the same array, but we only initialize the
+ // relevant fields here.
+ //
+ PhiInfo phiInfo;
+ auto& paramInfo = phiInfo.param;
+ paramInfo.param = param;
+ paramInfo.temp = temp;
+ phiInfos.add(phiInfo);
+ }
+ }
+
+
+ // The work of emitting assignments to our temporaries in
+ // the predecessors of `block` is really a per-predecessor task.
+ //
+ void emitAssignmentsInPredecessors(IRBlock* block)
+ {
+ // The only interesting detail at this level is that
+ // we need to work with a *copy* of the predecessor
+ // list. Our manipulation replaces the branch instruction
+ // at the end of each predecessor by adding a new one and
+ // removing the old. The addition/removing of branch
+ // instructions causes the predecessor list of `block` to
+ // be mutated, even if its contents end up the same.
+ //
+ List<IRBlock*> predecessors;
+ for (auto pred : block->getPredecessors())
+ {
+ predecessors.add(pred);
+ }
+ for (auto pred : predecessors)
+ {
+ emitAssignmentsInPredecessor(pred);
+ }
+ }
+
+ // We will put off discussion of `emitAssignmentsInPredecessor()`
+ // for now, because it is the thorniest part of the problem.
+ //
+ // Instead, let us look at the far simpler task of eliminating
+ // all the *other* uses of block parameters, after the branches
+ // have been dealt with.
+ //
+ void replaceParamUsesWithTemps()
+ {
+ for(auto& phiInfo : phiInfos)
+ {
+ auto& paramInfo = phiInfo.param;
+ auto param = paramInfo.param;
+ auto temp = paramInfo.temp;
+
+ // We will repeatedly replace whatever the *first*
+ // use of `param` is, until there are no more uses
+ // left. Iterating in this fashion avoids any
+ // problems that would arise from trying to traverse
+ // the list of uses while also modifying it.
+ //
+ while (auto use = param->firstUse)
+ {
+ // We emit a distinct `load` from the temporary
+ // right before each instruction that uses the
+ // parameter. We do this to minimize the number
+ // of temporaries/copies that are created in
+ // the emit logic for high-level-language targets.
+ //
+ // We have logic that can "fold" a `load` instruction
+ // into a use site such that it shows up as an ordinary
+ // variable reference, but this logic currently only
+ // applies if there are no `store`s or other operations
+ // with possible side effects between the `load` and
+ // the place where it gets used.
+ //
+ // An alternative implementation of this pass might
+ // `load` each of our temporaries once, at the top of
+ // `block`, and then use that same value at all use sites.
+ //
+ auto user = use->getUser();
+ m_builder.setInsertBefore(user);
+ auto newVal = m_builder.emitLoad(temp);
+ use->set(newVal);
+ }
+
+ // Once we've replaced all its uses, there is no need
+ // to keep `param` around.
+ //
+ param->removeAndDeallocate();
+ }
+ }
+
+ // Now it is time to get back to `emitAssignmentsInPredecessor()`.
+ //
+ // As discussed in `replaceParamUsesWithTemps()`, we want to avoid
+ // emitting high-level-language output code with unnecessary copies.
+ // That goal makes the process of emitting assignments to our
+ // temporarites in the predecessors of `block` more challenging.
+ //
+ // To understand the challenge, consider a block like:
+ //
+ // block B(x,y):
+ // ...
+ //
+ // and a branch to that block of the form:
+ //
+ // br B(y, x);
+ //
+ // The phi operations here are effectively swapping `x` and `y`,
+ // so we know that output code will need at least *one*
+ // intermediate copy to do the job.
+ //
+ // We don't want to see output like:
+ //
+ // // br B(y,x);
+ // x = y;
+ // y = x;
+ //
+ // but we also don't want to see more copies then necessary:
+ //
+ // // br B(y,x);
+ // auto tmp_x = x;
+ // auto tmp_y = y;
+ // x = tmp_y;
+ // y = tmp_x;
+ //
+ // Our goal is emit a strictly *minimal* number of copies.
+ //
+ // In order to solve the problem, we need to track some information
+ // on a per-branch-site basis. The most obvious of this is information
+ // about each argument that the branch pasess to a corresponding
+ // block parameter.
+ //
+ struct ArgInfo
+ {
+ // We track the original argument value that was passed at the branch.
+ //
+ IRInst* originalVal = nullptr;
+
+ // At a branch site, we can consider that the goal is to assign
+ // each argument (`ArgInfo`) to the temporary for a corresponding
+ // parameter (`ParamInfo`).
+ //
+ // The problematic cases arise when an argument value is itself
+ // a reference to a block parameter (e.g., in the `(y,x) -> (x,y)`
+ // case). We thus track whether or not `originalVal` above is
+ // itself a block parameter and, if so, what the index of that
+ // parameter is.
+ //
+ Index paramIndex = kInvalidIndex;
+
+ // When there is a cyclic dependency between arguments at
+ // a branch site, no sequence of plain assignments (without
+ // additional copies) will suffice.
+ //
+ // When we end up having to break cycles, we do so by loading
+ // a copy of the value in one of the per-parameter temporaries.
+ // Any subsequent branch arguments that referenced the parameter
+ // will need to use that copy instead.
+ //
+ // In order to make sure that we properly reference the loaded
+ // copy instead of the original argument in such cases, we use
+ // a pointer field for each argument that points to a location
+ // where the up-to-date value can be found.
+ //
+ // For most arguments this will always just point to `originalVal`,
+ // but for arguments that refer to a block parameter, this will
+ // point to the `currentVal` field of the corresponding `ParamInfo`.
+ //
+ IRInst** currentValPtr = nullptr;
+ };
+ enum { kInvalidIndex = -1 };
+
+ // A lot of the logic in this pass is concerned with the process of
+ // emitting *assignments* from branch arguments to block parameters.
+ // Those assignments are implicit in our SSA IR, but this pass needs
+ // to make them explicit.
+ //
+ // In order to emit assignments in an order that minimizes the number
+ // of temporaries/copies that appear in output code, we need a way
+ // to track which assignments have been done, which are ready to be done,
+ // and which are "blocked" for some reason. We thus associate each
+ // assignment with an integer _state_ which is in one of three cases:
+ //
+ // * _done_ (`-1`): any instructions needed for the assignment have been emitted
+ //
+ // * _ready_ (`0`): the assignment can be emitted without causing any problems
+ //
+ // * _blocked_ (`N > 0`): the assignment cannot be emitted yet, because there
+ // are `N` other not-yet-done assignments that need to read the value of the
+ // parameter that this assignment wants to write. The reads need to be able
+ // to proceed before the write can go through.
+ //
+ enum
+ {
+ kState_Done = -1,
+ kState_Ready = 0,
+ };
+
+ // There is a one-to-one correspondance between:
+ //
+ // * The phis/parameters of a particular `block`
+ // * The arguments passed at some branch to `block`
+ // * The assignments that need to be performed at that branch site
+ //
+ // We thus use a single structure to track all of that information,
+ // which is handy but also requires careful thought at use sites
+ // about which version of the information is relevant.
+ //
+ struct PhiInfo
+ {
+ ParamInfo param;
+ ArgInfo arg;
+ Count state = kState_Ready;
+ };
+ List<PhiInfo> phiInfos;
+ Count getParamCount() { return phiInfos.getCount(); }
+
+ void initializeAssignmentInfo(IRUnconditionalBranch* branch, Index assignmentIndex)
+ {
+ // Each assignment is a request to write the value of
+ // some `srcArg` to some `dstParam`.
+ //
+ auto& assignment = phiInfos[assignmentIndex];
+ auto& srcArg = assignment.arg;
+ auto& dstParam = assignment.param;
+
+ // The actual argument values can be read off of
+ // the branch instruction.
+ //
+ auto srcArgVal = branch->getArg(assignmentIndex);
+ srcArg.originalVal = srcArgVal;
+ srcArg.currentValPtr = &srcArg.originalVal;
+
+ // The parameters have largely been initialized in the
+ // per-block logic, but we do need to (re-)initialize
+ // the `currentVal` field to get it ready for a new
+ // sequence of assignments.
+ //
+ dstParam.currentVal = dstParam.param;
+
+ // The main challenges arise when the argument value
+ // for an assignment is itself one of the parameters
+ // of the destination block.
+ //
+ // We can check if `srcArgVal` is a parameter using
+ // the map we pre-computed.
+ //
+ Index srcParamIndex = kInvalidIndex;
+ mapParamToIndex.TryGetValue(srcArgVal, srcParamIndex);
+ srcArg.paramIndex = srcParamIndex;
+
+ if (srcParamIndex != kInvalidIndex)
+ {
+ // In the case where the source *is* a parameter,
+ // we may not be able to use the source parameter value
+ // directly for this assignment, since we might
+ // need to make a scratch copy of it.
+ //
+ // To be able to keep up with such changes if they
+ // occur, we will update `currentValPtr` to point
+ // to the `currentVal` of the source parameter.
+ //
+ auto& srcParamInfo = phiInfos[srcParamIndex].param;
+ srcArg.currentValPtr = &srcParamInfo.currentVal;
+ }
+
+ // One very special case is when the source value to be assigned
+ // to a destination phi/parameter is the exact same phi/parameter.
+ // In such a case there is nothing that needs to be done, and we
+ // can consider the assignment fully handled.
+ //
+ if (srcParamIndex == assignmentIndex)
+ {
+ assignment.state = kState_Done;
+ }
+ else
+ {
+ // Otherwise we start out assuming that the assignment is ready
+ // to proceed, and then check to see whether it should be
+ // blocked as part of the next loop.
+ //
+ assignment.state = kState_Ready;
+ }
+ }
+
+ void checkIfAssignmentBlocksAnother(Index assignmentIndex)
+ {
+ // We can skip any case where this assignment has already been
+ // performed/resolved, since it cannot lead to anything being
+ // blocked waiting on it.
+ //
+ auto& assignment = phiInfos[assignmentIndex];
+ if (assignment.state == kState_Done)
+ {
+ return;
+ }
+
+ // We only care about cases where the source/argument for this
+ // assignment is another parameter.
+ //
+ auto& srcArg = assignment.arg;
+ auto srcParamIndex = srcArg.paramIndex;
+ if (srcParamIndex == kInvalidIndex)
+ {
+ return;
+ }
+
+ // Note that the sticky case of a parameter that refers to itself
+ // was already detected and handled in the previous loop.
+ //
+ SLANG_ASSERT(srcParamIndex != assignmentIndex);
+ //
+ // In fact, it is possible that the assignment for `srcParamIndex`
+ // has already been completed, since it was one of the self-referential
+ // cases. If that's true and the assignment is already marked as
+ // done, there is no reason to try and block it.
+ //
+ auto& srcParamAssignment = phiInfos[srcParamIndex];
+ if (srcParamAssignment.state == kState_Done)
+ {
+ return;
+ }
+
+ // Otherwise we are in exactly the case we are looking for.
+ // This `assignment` is of the form:
+ //
+ // temps[...] = temps[srcParamIndex];
+ //
+ // and there is another assignment of the form:
+ //
+ // temps[srcParamIndex] = ...;
+ //
+ // That we cannot allow to proceed until after *this* assignment
+ // has been allowed to read from the temporary for `srcParamIndex`.
+ //
+ // The representation of both the _ready_ and _blocked_ states
+ // is equal to the number of "blockers," so in either case we can
+ // increment the `state` in order to add in a(nother) blocker.
+ //
+ srcParamAssignment.state++;
+ }
+
+ void emitAssignmentsInPredecessor(IRBlock* pred)
+ {
+ // Given the way our IR is structured, we have an invariant that the
+ // predecessor block *must* end with an unconditional branch (as they
+ // are the only kinds of branches allowed to carry arguments).
+ //
+ auto branch = cast<IRUnconditionalBranch>(pred->getTerminator());
+ SLANG_ASSERT(branch);
+
+ // The predecessor block must pass the expected number of arguments
+ // to `block`, or the IR has been invalidated in some previous pass.
+ //
+ auto paramCount = getParamCount();
+ Count argCount = branch->getArgCount();
+ SLANG_ASSERT(argCount == paramCount);
+
+ // We are going to emit a sequence of assignments that write the
+ // arguments of `branch` to the temporaries for the parameters of
+ // `block`. All of these assignments have to be the last thing we
+ // do before the branch.
+ //
+ m_builder.setInsertBefore(branch);
+
+ // Our first order of business is to initialize the per-branch-site
+ // and per-argument/-assignment information, so that it is correct
+ // for `branch`.
+ //
+ for (Index assignmentIndex = 0; assignmentIndex < argCount; ++assignmentIndex)
+ {
+ initializeAssignmentInfo(branch, assignmentIndex);
+ }
+
+ // We can now scan through our assignments and try to determine
+ // which of them are _blocked_.
+ //
+ // A assignment of `param_i <- arg_i` is blocked if there
+ // exists some other not-yet-done assignment of the form
+ // `param_j <- param_i`. That is, an assignment is blocked
+ // when the parameter it wants to write to is being used as
+ // the source/argument for some other assignment (that has not
+ // yet been completed).
+ //
+ // We can thus loop over the assignments and for each one see
+ // if it is in the form `param_j <- param_i` for some `i` and,
+ // if so, tell the assignment for `param_i <- ....` that it is
+ // blocked.
+ //
+ for (Index assignmentIndex = 0; assignmentIndex < paramCount; ++assignmentIndex)
+ {
+ checkIfAssignmentBlocksAnother(assignmentIndex);
+ }
+
+ // Once we have identified the cases where one assignment is
+ // blocked on another, we can scan through the list of assignments
+ // and try to perform any assignmenst that are in the _ready_ state,
+ // as well as any additional assignments that become _ready_ as a result.
+ //
+ for (Index assignmentIndex = 0; assignmentIndex < paramCount; ++assignmentIndex)
+ {
+ tryPerformParamAssignment(assignmentIndex);
+ }
+
+ // The only assignments that could not be completed in the previous
+ // loop would be those that are part of a dependency cycle.
+ // We make one more loop over the assignments, and this time we will
+ // ensure that the assignment gets to the _done_ state, even if doing
+ // so requires loading a copy of one of our temporaries.
+ //
+ for (Index assignmentIndex = 0; assignmentIndex < paramCount; ++assignmentIndex)
+ {
+ completeAssignmentUsingCopyIfNeeded(assignmentIndex);
+ }
+
+ // Once we are sure all the assignment operations have been performed,
+ // we can set about replacing the unconditional branch itself.
+ //
+ replaceBranch(branch);
+ }
+
+ // Replacing the branch instruction at the end of a predecessor block
+ // is relatively simple, and just a bit of busy-work.
+ //
+ void replaceBranch(IRUnconditionalBranch* oldBranch)
+ {
+ // When creating a replacement instruction here, we need to make sure
+ // that we keep all the operands that weren't phi arguments.
+ //
+ Count oldOperandCount = oldBranch->getOperandCount();
+ Count paramCount = getParamCount();
+ Count newOperandCount = oldOperandCount - paramCount;
+
+ // There are currently two different opcodes that map to unconditional
+ // branches, with different numbers of operands before the phi-related
+ // arguments:
+ //
+ // unconditionalBranch(TargetBlock, arg0, arg1, arg2, ...);
+ // loop(TargetBlock, BreakBlock, ContinueBlock, arg0, arg1, arg2, ...);
+ //
+ // In either case, there is a constant bound on the number of non-phi
+ // operands.
+ //
+ static const Count kMaxNewOperandCount = 3;
+ SLANG_ASSERT(newOperandCount <= kMaxNewOperandCount);
+
+ IRInst* newOperands[kMaxNewOperandCount] = {};
+ for (Index i = 0; i < newOperandCount; ++i)
+ {
+ newOperands[i] = oldBranch->getOperand(i);
+ }
+
+ auto newBranch = m_builder.emitIntrinsicInst(
+ oldBranch->getFullType(),
+ oldBranch->getOp(),
+ newOperandCount,
+ newOperands);
+ oldBranch->transferDecorationsTo(newBranch);
+
+ // TODO: We could consider just modifying `branch` in-place by clearing
+ // the relevant operands for the phi arguments and setting its operand
+ // count to a lower value.
+ //
+ oldBranch->removeAndDeallocate();
+ }
+
+ // The most subtle bit of logic, which relies on the data structures
+ // we have built so far, is the way we attempt to perform assignments
+ // that have become ready.
+ //
+ void tryPerformParamAssignment(Index firstAssignmentIndex)
+ {
+ // Performing one assignment may lead to another being unblocked,
+ // and entering the _ready_ state, in which case we wnat to perform
+ // *that* assignment, which may unblock another, etc.
+ //
+ // We thus use a loop and keep processing assignments as they
+ // become unblocked, until we run out of work.
+ //
+ Index assignmentIndex = firstAssignmentIndex;
+ for (;;)
+ {
+ auto& assignment = phiInfos[assignmentIndex];
+ if (assignment.state != kState_Ready)
+ {
+ // if the assignment we are looking at isn't ready to
+ // perform, we have reached the end of a chain of
+ // unblocked depdendencies (perhaps even a chain of zero).
+ //
+ return;
+ }
+
+ // When we have an assignment that is ready to perform,
+ // we do so by storing the value of the corresponding
+ // argument into the temporary for the coresponding
+ // parameter.
+ //
+ // Note that we use `actualValPtr` here instead of `originalVal`,
+ // so that any logic that might have moved another parameter
+ // into a temporary will influence our result.
+ //
+ auto& dstParam = assignment.param;
+ auto& srcArg = assignment.arg;
+ m_builder.emitStore(dstParam.temp, *srcArg.currentValPtr);
+ //
+ // Once the store is emitted, the assignment has been performed,
+ // and it can move to the _done_ state.
+ //
+ assignment.state = kState_Done;
+
+ // If the source of this assignment as itself a block
+ // parameter, then we may need to unblock the assignment
+ // for that parameter.
+ //
+ // If the source *isn't* a parameter, then there is nothing
+ // to unblock, and we've reached the end of a chain.
+ //
+ auto srcParamIndex = srcArg.paramIndex;
+ if (srcParamIndex == kInvalidIndex)
+ {
+ return;
+ }
+
+ // If the source *is* a parameter, but its assignment
+ // has already been performed, then we cannot unblock it
+ // (we certainly don't want to perform it again).
+ //
+ auto& srcParamAssignment = phiInfos[srcParamIndex];
+ if (srcParamAssignment.state == kState_Done)
+ {
+ return;
+ }
+
+ // If the source parameter's assignment hasn't been
+ // done yet, then we expect that it *must* be blocked
+ // (at the very least blocked on the assignment we
+ // have just performed).
+ //
+ SLANG_ASSERT(srcParamAssignment.state != kState_Ready);
+
+ // We remove one blocker from the source.
+ //
+ srcParamAssignment.state--;
+
+ // It is possible that removing this one blocker has
+ // moved the source parameter into the _ready_ state.
+ // Rather than check that here, we can simply move
+ // back to the top of this loop and consider the
+ // assignment corresponding to the source parameter
+ // as the next in our chain.
+ //
+ assignmentIndex = srcParamIndex;
+ }
+ }
+
+ // When we want to make sure that all our assignments *definitely*
+ // get completed, we need to be willing to make a temporary copy
+ // of a branch parameter in order to unblock an assignment.
+ //
+ void completeAssignmentUsingCopyIfNeeded(Index assignmentIndex)
+ {
+ // If the assignemtn is _blocked_ because of one or more
+ // other assignments, we will unblock it by making a copy.
+ //
+ auto& assignment = phiInfos[assignmentIndex];
+ if(assignment.state > 0)
+ {
+ auto& dstParam = assignment.param;
+
+ // The assignment to `dstParam` is blocked because there
+ // exist one or more other not-yet-completed assignments
+ // where `dstParam` is being used as a source.
+ //
+ // We emit a `load` from the temporary for `dstParam` in
+ // order to make a copy, at which point we can safely
+ // perform the assignment for to the original, and allow
+ // the not-yet-completed assignments to use that copy
+ // instead, as the current value for `dstParam`.
+ //
+ dstParam.currentVal = m_builder.emitLoad(dstParam.temp);
+ assignment.state = kState_Ready;
+ }
+
+ // If this assignment *was* blocked, we have made it so
+ // that it isn't blocked any more. We thus expect that
+ // trying to perform the assignment again will definitely
+ // result it the assignment being in the _done_ state.
+ //
+ tryPerformParamAssignment(assignmentIndex);
+ SLANG_ASSERT(assignment.state == kState_Done);
+ }
+};
+
+void eliminatePhis(CodeGenContext* codeGenContext, IRModule* module)
+{
+ PhiEliminationContext context(codeGenContext, module);
+ context.eliminatePhisInModule();
+}
+
+}
diff --git a/source/slang/slang-ir-eliminate-phis.h b/source/slang/slang-ir-eliminate-phis.h
new file mode 100644
index 000000000..58690c012
--- /dev/null
+++ b/source/slang/slang-ir-eliminate-phis.h
@@ -0,0 +1,16 @@
+// slang-ir-eliminate-phis.h
+#pragma once
+
+namespace Slang
+{
+ struct CodeGenContext;
+ struct IRModule;
+
+ /// Eliminate all "phi nodes" from the given `module`.
+ ///
+ /// This process moves the code in `module` *out* of SSA form,
+ /// so that it is more suitable for emission on targets that
+ /// are not themselves based on an SSA representation.
+ ///
+ void eliminatePhis(CodeGenContext* context, IRModule* module);
+}
diff --git a/source/slang/slang-ir-restructure-scoping.cpp b/source/slang/slang-ir-restructure-scoping.cpp
index 0535483ec..dca1a92b6 100644
--- a/source/slang/slang-ir-restructure-scoping.cpp
+++ b/source/slang/slang-ir-restructure-scoping.cpp
@@ -307,24 +307,39 @@ static void fixValueScopingForInst(
// If we've gotten this far, we know that `u` is a "bad"
// use of `def`, and needs fixing.
//
- // We will create the `tmp` variable on demand, so
- // that we create it when the first "bad" use is encountered,
- // and then re-use it for subsequent bad uses.
+ // We will use a temporary variable to resolve the bad scoping,
+ // creating it on-demand when we ecounter a first "bad" use, and
+ // then re-using that temporary for any subsequent bad uses.
//
if( !tmp )
{
- // We will create a temporary to represent `def`,
- // and insert a `store` into it right after `def`.
+ // If the value is *already* a temporary variable, then
+ // we are really just trying to fix the scoping of the
+ // variable declaration itself, and the variable can
+ // effectively be its own temporary.
//
- // Note: we are inserting the new variable right
- // after `def` for now, just because we don't
- // yet know the final region that it should be
- // placed into. We will move it to the correct
- // location when we are done.
- //
- builder.setInsertBefore(def->getNextInst());
- tmp = builder.emitVar(def->getDataType());
- builder.emitStore(tmp, def);
+ if(auto varDef = as<IRVar>(def))
+ {
+ tmp = varDef;
+ }
+ else
+ {
+ // We will create a temporary to represent `def`,
+ // and insert a `store` into it right after `def`.
+ //
+ // Note: we are inserting the new variable right
+ // after `def` for now, just because we don't
+ // yet know the final region that it should be
+ // placed into. We will move it to the correct
+ // location when we are done.
+ //
+ builder.setInsertBefore(def->getNextInst());
+ tmp = builder.emitVar(def->getDataType());
+ builder.emitStore(tmp, def);
+ //
+ // Note: the lifetime for the new variable starts
+ // right after the store we have emitted.
+ }
}
// In order to know where `tmp` should be defined
@@ -345,19 +360,40 @@ static void fixValueScopingForInst(
rr);
}
- // To fix up the use `u`, we will need to change
- // it from using `def` to using a load from `tmp`
- //
- builder.setInsertBefore(user);
- IRInst* tmpVal = builder.emitLoad(tmp);
-
- // We are clobbering the value used by the `IRUse` `u`,
- // while will cut it out of the list of uses for `def`.
- // We need to be careful when doing this to not disrupt
- // our iteration of the uses of `def`, so we carefully
- // used the `nextUse` temporary at the start of the loop.
+ // We need to fix up the use `u`, but the way we fix
+ // it depends on whether we moving `def` itself (in which
+ // case `tmp` and `def` are the same), or if we have
+ // introduced an intermediate temporary.
//
- u->set(tmpVal);
+ if (def == tmp)
+ {
+ // If we are moving the definition itself, we don't
+ // need to do any kind of fix-up work at use sites.
+ }
+ else
+ {
+ // Othwerise we need to fix up the use `use` so
+ // that it uses a value loaded from `tmp` instead
+ // of `def`.
+ //
+ builder.setInsertBefore(user);
+ IRInst* tmpVal = builder.emitLoad(tmp);
+
+ // We are clobbering the value used by the `IRUse` `u`,
+ // while will cut it out of the list of uses for `def`.
+ // We need to be careful when doing this to not disrupt
+ // our iteration of the uses of `def`, so we carefully
+ // used the `nextUse` temporary at the start of the loop.
+ //
+ // Note(tfoley): This is more subtle than the comment makes
+ // it out to be, because we are *also* injecting a new use
+ // of `def` in the logic that creates `tmp` (because `def`
+ // is used as an operand to the `store` that initializes
+ // `tmp`). We really ought to work on a copy of the use-def
+ // information.
+ //
+ u->set(tmpVal);
+ }
}
// At the end of the loop, the `tmp` variable will have
@@ -421,8 +457,27 @@ void fixValueScoping(RegionTree* regionTree)
if(!parentRegion)
continue;
- for(auto inst : block->getDecorationsAndChildren())
+ // Note: This pass will end up modifying the IR while also
+ // iterating over it. As such, we need to be careful not
+ // to let our iteration logic get confused.
+ //
+ // In particular, it is possible that `inst` will get moved
+ // to another block, as a way to resolve scoping issues, and
+ // if we did not account for that result, we might end up
+ // walking to the next instruction after `inst` even though
+ // it isn't inside `block`.
+ //
+ // We defensively cache the next instruction to visit so that
+ // we can continue our iteration after `inst` even if it gets
+ // moved. For now we are confident that the operations on
+ // `inst` won't affect `nextInst`, since the pass is not supposed
+ // to move or delete any *other* instructions.
+ //
+ IRInst* nextInst = nullptr;
+ for (auto inst = block->getFirstOrdinaryInst(); inst; inst = nextInst)
{
+ nextInst = inst->getNextInst();
+
fixValueScopingForInst(inst, parentRegion, regionTree);
}
}