diff options
Diffstat (limited to 'source/slang')
| -rw-r--r-- | source/slang/slang-emit-c-like.cpp | 276 | ||||
| -rw-r--r-- | source/slang/slang-emit-c-like.h | 9 | ||||
| -rw-r--r-- | source/slang/slang-emit-cpp.cpp | 4 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 68 | ||||
| -rw-r--r-- | source/slang/slang-ir-eliminate-phis.cpp | 894 | ||||
| -rw-r--r-- | source/slang/slang-ir-eliminate-phis.h | 16 | ||||
| -rw-r--r-- | source/slang/slang-ir-restructure-scoping.cpp | 109 |
7 files changed, 1036 insertions, 340 deletions
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 06e1e782b..6b8939d85 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2081,13 +2081,8 @@ void CLikeSourceEmitter::_emitInst(IRInst* inst) case kIROp_Var: { - auto ptrType = cast<IRPtrType>(inst->getDataType()); - auto valType = ptrType->getValueType(); - - auto name = getName(inst); - emitRateQualifiers(inst); - emitType(valType, name); - m_writer->emit(";\n"); + auto var = cast<IRVar>(inst); + emitVar(var); } break; @@ -2222,212 +2217,6 @@ void CLikeSourceEmitter::emitLayoutSemantics(IRInst* inst, char const* uniformSe emitLayoutSemanticsImpl(inst, uniformSemanticSpelling); } -void CLikeSourceEmitter::emitPhiVarAssignments(UInt argCount, IRUse* args, IRBlock* targetBlock) -{ - // The basic setup here is that we are at the end of a block - // and are about to branch to `targetBlock`. The target block - // has parameters (representing the phi variables/nodes in SSA - // form), and the block we are branching from provided arguments - // (given as `args` and `argCount` here) to be passed to those - // parameters. - // - // An earlier step will have already emitted a local variable - // declaration for each phi node (block parameter), so we should - // be able to "pass" an argument to a parameter by assigning - // to the variable that represents the parameter. - // - // A naive approach would simply loop over the parameters/arguments - // in tandem and emit assignments like: - // - // <param_i> = <arg_i>; - // - // This approach has an important and known failure case, which - // can occur when one of the arguments is also one of the parameters. - // - // If we have a block like: - // - // block b( - // param x: Int, - // y: Int) - // ... - // - // and then a branch to it like: - // - // br(b, y, x); - // - // Then the naive approach for emitting the assignments for - // that branch would output: - // - // x = y; - // y = x; - // - // But that is not the semantics that the "parameter passing" - // model is meant to capture. - // - // The simplest way to restore the correct semantics would - // be to assign each of the arguments to a temporary, and - // then to assign those temporaries to the parameters: - // - // let tmp0 = y; - // let tmp1 = x; - // x = tmp0; - // y = tmp1; - // - // Adding temporaries like that unconditionally would clutter - // up the generated code, so it would be nicer to only - // introduce them when strictly necessary. - // - // A temporary should only be necessary when we have: - // - // * A parameter `destParam` of the `targetBlock` - // * Where the argument value for `destParam` is another parameter, `srcParam`, of `targetBlock` - // * And `srcParam` appears before `destParam` in the parameter list - // - // We start by looking for such cases, and collecting the - // set of block parameters that will require a defensive - // temporary. - // - // Note: an alternative approach would first look for a total - // order on the arguments/parameters such that we avoid - // the problem, and then only resort to introducing temporaries - // as a means of breaking cycles. - // - // We will track the parameters that need temporaries by - // building a dictionary mapping parameters to the name - // of the temporary they will use, if any. - // - Dictionary<IRParam*, String> mapParamToTempName; - { - // To know whether a parameter occurs earlier in - // the list than another, we will rely on a set - // of parameters we have already "seen" during - // our iteration. - // - // TODO: This is a strong candidate for using a - // hash set optimized for a small number of entries - // (and thus using a stack allocation in the common case), - // given that most blocks will only have a handful - // of parameters. - // - HashSet<IRParam*> seenParams; - - UInt argCounter = 0; - for (auto destParam = targetBlock->getFirstParam(); destParam; destParam = destParam->getNextParam()) - { - seenParams.Add(destParam); - - UInt argIndex = argCounter++; - if (argIndex >= argCount) - { - SLANG_UNEXPECTED("not enough arguments for branch"); - break; - } - IRInst* arg = args[argIndex].get(); - - // Is the argument also a parameter of the same block? - // - // If not, then we don't have to worry about this assignment. - // - IRParam* srcParam = as<IRParam>(arg); - if(!srcParam) - continue; - if(srcParam->getParent() != targetBlock) - continue; - - // Is the argument an *earlier* parameter of the same block? - // We test this by checking if we've already seen it during - // our iteration. - // - // If the parameter isn't an earlier one, then we don't have - // to worry about the order of assignment creating problems. - // - if(!seenParams.Contains(srcParam)) - continue; - - // At this point we've detected a problem case: the `dstParam` - // would naively be assigned the value of `srcParam` *after* - // we've already assigned to `srcParam`. - // - // We will need to avoid the problem by introducing a temporary - // for `srcParam`. - // - if( !mapParamToTempName.ContainsKey(srcParam) ) - { - String tempName = "tmp_" + getName(srcParam); - mapParamToTempName.Add(srcParam, tempName); - } - } - } - - // Now that we've determined which of the parameters could cause problems, - // we can do an initial round of assignments, where we move form the - // argument values into either a parameter (if there were no problems) - // or a temporary (if we needed to avoid a problem). - // - { - UInt argCounter = 0; - for (auto param = targetBlock->getFirstParam(); param; param = param->getNextParam()) - { - UInt argIndex = argCounter++; - if (argIndex >= argCount) - { - SLANG_UNEXPECTED("not enough arguments for branch"); - break; - } - IRInst* arg = args[argIndex].get(); - - auto outerPrec = getInfo(EmitOp::General); - auto prec = getInfo(EmitOp::Assign); - - String tempName; - if( mapParamToTempName.TryGetValue(param, tempName) ) - { - // This parameter was involved in a confounding assignment, - // where it was used as the argument value for a later - // parameter on this same branch/edge. - // - // We need to assign to a temporary instead of - // to the parameter itself. We will declare - // the temporary here so that the assignment - // of the argument serves as the initializer. - // - emitType(param->getFullType(), tempName); - } - else - { - // This parameter was not involved in any confounding assignments, - // so we can simply us the parameter itself as the left-hand side - // of an assignment. - // - emitOperand(param, leftSide(outerPrec, prec)); - } - - m_writer->emit(" = "); - emitOperand(arg, rightSide(prec, outerPrec)); - m_writer->emit(";\n"); - } - } - - // Finally, after all the assignments from argument values have been completed, - // we will make a cleanup pass that copies from any of the temporary variables - // we introduced over to the parameter that needed a temporary. - // - for (auto param = targetBlock->getFirstParam(); param; param = param->getNextParam()) - { - String tempName; - if( !mapParamToTempName.TryGetValue(param, tempName) ) - continue; - - auto outerPrec = getInfo(EmitOp::General); - auto prec = getInfo(EmitOp::Assign); - - emitOperand(param, leftSide(outerPrec, prec)); - m_writer->emit(" = "); - m_writer->emit(tempName); - m_writer->emit(";\n"); - } -} - void CLikeSourceEmitter::emitRegion(Region* inRegion) { // We will use a loop so that we can process sequential (simple) @@ -2478,43 +2267,6 @@ void CLikeSourceEmitter::emitRegion(Region* inRegion) // separate `Region`s for them. emitInst(terminator); break; - - // We will also handle any unconditional branches - // here, since they may have arguments to pass - // to the target block (our encoding of SSA - // "phi" operations). - // - // TODO: A better approach would be to move out of SSA - // as an IR pass, and introduce explicit variables to - // replace any "phi nodes." This would avoid possible - // complications if we ever end up in the bad case where - // one of the block arguments on a branch is also - // a parameter of the target block, so that the order - // of operations is important. - // - case kIROp_unconditionalBranch: - { - auto t = (IRUnconditionalBranch*)terminator; - UInt argCount = t->getOperandCount(); - static const UInt kFixedArgCount = 1; - emitPhiVarAssignments( - argCount - kFixedArgCount, - t->getOperands() + kFixedArgCount, - t->getTargetBlock()); - } - break; - case kIROp_loop: - { - auto t = (IRLoop*) terminator; - UInt argCount = t->getOperandCount(); - static const UInt kFixedArgCount = 3; - emitPhiVarAssignments( - argCount - kFixedArgCount, - t->getOperands() + kFixedArgCount, - t->getTargetBlock()); - - } - break; } // If the terminator required a full region to represent @@ -2666,26 +2418,6 @@ void CLikeSourceEmitter::emitEntryPointAttributes(IRFunc* irFunc, IREntryPointDe emitEntryPointAttributesImpl(irFunc, entryPointDecor); } -void CLikeSourceEmitter::emitPhiVarDecls(IRFunc* func) -{ - // We will skip the first block, since its parameters are - // the parameters of the whole function. - auto bb = func->getFirstBlock(); - if (!bb) - return; - bb = bb->getNextBlock(); - - for (; bb; bb = bb->getNextBlock()) - { - for (auto pp = bb->getFirstParam(); pp; pp = pp->getNextParam()) - { - emitTempModifiers(pp); - emitType(pp->getFullType(), getName(pp)); - m_writer->emit(";\n"); - } - } -} - void CLikeSourceEmitter::emitFunctionBody(IRGlobalValueWithCode* code) { // Compute a structured region tree that can represent @@ -2782,10 +2514,6 @@ void CLikeSourceEmitter::emitSimpleFuncImpl(IRFunc* func) m_writer->emit("\n{\n"); m_writer->indent(); - // HACK: forward-declare all the local variables needed for the - // parameters of non-entry blocks. - emitPhiVarDecls(func); - // Need to emit the operations in the blocks of the function emitFunctionBody(func); diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 25bafecf9..109731566 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -321,13 +321,6 @@ public: void emitLayoutSemantics(IRInst* inst, char const* uniformSemanticSpelling = "register"); - // When we are about to traverse an edge from one block to another, - // we need to emit the assignments that conceptually occur "along" - // the edge. In traditional SSA these are the phi nodes in the - // target block, while in our representation these use the arguments - // to the branch instruction to fill in the parameters of the target. - void emitPhiVarAssignments(UInt argCount, IRUse* args, IRBlock* targetBlock); - /// Emit high-level language statements from a structured region. void emitRegion(Region* inRegion); @@ -339,8 +332,6 @@ public: void emitEntryPointAttributes(IRFunc* irFunc, IREntryPointDecoration* entryPointDecor); - void emitPhiVarDecls(IRFunc* func); - /// Emit high-level statements for the body of a function. void emitFunctionBody(IRGlobalValueWithCode* code); diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 93bc5c979..37b8bfbce 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -1921,10 +1921,6 @@ void CPPSourceEmitter::emitSimpleFuncImpl(IRFunc* func) m_writer->emit("\n{\n"); m_writer->indent(); - // HACK: forward-declare all the local variables needed for the - // parameters of non-entry blocks. - emitPhiVarDecls(func); - // Need to emit the operations in the blocks of the function emitFunctionBody(func); diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 1b94c588b..0539f9d1d 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -10,6 +10,7 @@ #include "slang-ir-collect-global-uniforms.h" #include "slang-ir-dce.h" #include "slang-ir-dll-import.h" +#include "slang-ir-eliminate-phis.h" #include "slang-ir-entry-point-uniforms.h" #include "slang-ir-entry-point-raw-ptr-params.h" #include "slang-ir-explicit-global-context.h" @@ -731,39 +732,54 @@ Result linkAndOptimizeIR( lowerBitCast(targetRequest, irModule); simplifyIR(irModule); - // TODO(JS): We probably want to add a pass that moves phi-node temporaries to - // IR. - // - // Currently these are added as part of emit in - // emitPhiVarAssignments and emitPhiVarDecls - // - // A possible mechanism might be: - // 1) Find all of the parameters passed between blocks - // 2) Make a variable for each one of them - // This could be at the scope for the function, or more ideally a scope that is 'most appropriate' for how the parameter is passed - // ie the closest scope such that the variable is in scope across the branch. - // 3) Replace all uses of the parameters passed into a block (except the entry block), with the temporary - // 3a) Remove the parameters from the start of a block (other than the entry block) - // 4) For all of the branches in a function - // 4a) For each parameter passed in the branch, assign to the temporary - // 4b) Replace the branch with a branch that has no parameters // - // This should lead to an equivalent function, where the parameter passing between blocks is removed, and all the temporaries - // are explicit in the output. - // - // I guess there could be a desire to combine the liveness tracking into this pass, because once a phi-temporary has been moved - // we have lost(?) information about liveness. That could potentially be recovered, but for the phi-temporaries, their - // initial liveness is trivial, it's when the assignment takes place, at the branch point. - // - // If all the temporaries were marked as such, then this would be fairly trivial to recreate. - - // TODO(JS): Without a pass to make all variables (including phi ones), the liveness tracking can't track everything + // Downstream targets may benefit from having live-range information for + // local variables, and our IR currently encodes a reasonably good version + // of that information. At this point we will insert live-range markers + // for local variables, on when such markers are requested. + // + // After this point in optimization, any passes that introduce new + // temporary variables into the IR module should take responsibility for + // producing their own live-range information. + // if (codeGenContext->shouldTrackLiveness()) { addLivenessTrackingToModule(irModule); dumpIRIfEnabled(codeGenContext, irModule, "LIVENESS"); + } + + // As a late step, we need to take the SSA-form IR and move things *out* + // of SSA form, by eliminating all "phi nodes" (block parameters) and + // introducing explicit temporaries instead. Doing this at the IR level + // means that subsequent emit logic doesn't need to contend with the + // complexities of blocks with parameters. + // + eliminatePhis(codeGenContext, irModule); +#if 0 + dumpIRIfEnabled(codeGenContext, irModule, "PHIS ELIMINATED"); +#endif + // TODO: We need to insert the logic that fixes variable scoping issues + // here (rather than doing it very late in the emit process), because + // otherwise the `applyGLSLLiveness()` operation below wouldn't be + // able to see the live-range information that pass would need to add. + // For now we are avoiding that problem by simply *not* emitting live-range + // information when we fix variable scoping later on. + + // Depending on the target, certain things that were represented as + // single IR instructions will need to be emitted with the help of + // function declaratons in output high-level code. + // + // One example of this is the live-range information, which needs + // to be output to GLSL code that uses a glslang extension for + // supporting function declarations that map directly to SPIR-V opcodes. + // + // We execute a pass here to transform any live-range instructions + // in the module into function calls, for the targets that require it. + // + if (codeGenContext->shouldTrackLiveness()) + { if (isKhronosTarget(targetRequest)) { applyGLSLLiveness(irModule); diff --git a/source/slang/slang-ir-eliminate-phis.cpp b/source/slang/slang-ir-eliminate-phis.cpp new file mode 100644 index 000000000..2891376eb --- /dev/null +++ b/source/slang/slang-ir-eliminate-phis.cpp @@ -0,0 +1,894 @@ +// slang-ir-eliminate-phis.cpp +#include "slang-ir-eliminate-phis.h" + +// This file implements a pass to take code in the Slang IR out out SSA form +// by eliminating all "phi nodes." +// +// The Slang IR does not represent phi operations in the "textbook" way, and +// it is important to understand how it *does* represent those operations in +// order to understand this pass. +// +// In a textbook encoding of SSA, a basic block begins with zero or more `phi` +// instructions: +// +// block B: +// let x = phi(a, b); +// let y = phi(c, d); +// ... +// +// Each `phi` operation has a number of operands equal to the number of +// predecessors of `B`, with one operand corresponding to each predecessor. +// Semantically, we usually think of a `phi` as auto-magically selecting between +// its operands beased on the control-flow path that was used to arrive at `B`. +// Note, however, that all of the `phi` operations at the start of a block must +// be understood as executing *simultaneously*, so that if `x` or `y` above was +// used as a `phi` operand, they would yield their value from *before* the binding +// not after. Oh, and also the operands of `phi` instructions are the *one* place +// where the rule about how "defs must dominate uses" isn't upheld. +// +// Our encoding of the same thing is equivalent, more consistent, and in most ways +// easier to understand. A basic block in the Slang IR can have *parameters*, +// and a branch to such a block must pass *arguments* for those parameters: +// +// block B(x,y): +// ... +// +// block M: +// ... +// br B(a, c); +// +// block N: +// ... +// br B(b, d); +// +// Note how in this formulation there is no auto-magical behavior. The relationship +// between a predecessor block and the values that `x, y` take on is clear. The +// way that `x, y` take on their new values simulataneously is also more apparent +// (since it intuitively matches a recursive function call). Further, our IR is +// able to follow the "def dominates use" rule more completely. +// +// With that out of the way, let's dive into the pass itself. + +#include "slang-ir.h" +#include "slang-ir-insts.h" +#include "slang-ir-dominators.h" + +namespace Slang { + +struct PhiEliminationContext +{ + // We are going to make some effort to re-use intermediate structures across + // different functions that we process, so that we don't do more allocation + // than necessary. + // + // At the top level, our pas needs to have access to the IR module, and needs + // a builder it can use to generate code. + // + CodeGenContext* m_codeGenContext = nullptr; + IRModule* m_module = nullptr; + SharedIRBuilder m_sharedBuilder; + IRBuilder m_builder; + + PhiEliminationContext(CodeGenContext* codeGenContext, IRModule* module) + : m_codeGenContext(codeGenContext) + , m_module(module) + , m_sharedBuilder(module) + , m_builder(m_sharedBuilder) + {} + + // We start with the top-down logic of the pass, which is to process + // the functions in the module one at a time. + // + // Note that global variables are also included here, since they are + // also `IRGlobalValueWithCode`s, but we don't typically expect them + // to have more than one basic block, much less phis. + // + void eliminatePhisInModule() + { + for (auto inst : m_module->getGlobalInsts()) + { + switch (inst->getOp()) + { + default: + continue; + + case kIROp_Func: + case kIROp_GlobalVar: + break; + } + + auto code = (IRGlobalValueWithCode*)inst; + eliminatePhisInFunc(code); + } + } + + // Within a single function, we are primarily concerned with processing + // each of its blocks. + // + void eliminatePhisInFunc(IRGlobalValueWithCode* func) + { + initializePerFuncState(func); + + // The first block in a function is always the entry block, + // and its parameters are different than those of the other blocks; + // they represent the parameters of the *function*. We therefore + // need to skip the first block. + // + auto firstBlock = func->getFirstBlock(); + for (auto block : func->getBlocks()) + { + if (block == firstBlock) + { + continue; + } + + eliminatePhisInBlock(block); + } + } + + // In order to facilitate breaking things down into subroutines, we use a + // member variable to track the function currently being processed, and + // also a dominator tree for that function. + // + IRGlobalValueWithCode* m_func = nullptr; + RefPtr<IRDominatorTree> m_dominatorTree; + + // Because we use the same `PhiEliminationContext` to process all of + // the functions in a module, we need to set up these per-function + // state variables for each new function encountered. + // + void initializePerFuncState(IRGlobalValueWithCode* func) + { + m_func = func; + m_dominatorTree = nullptr; + } + + // The dominator tree for the function is computed on demand and + // cached. We do this to avoid the expensive of allocating the + // `IRDominatorTree` structure in cases where a function doesn't + // end up having any phis that need elimination. Note that any + // "straight-line" function taht doesn't involve control flow + // will never have any phis, so we expect that case to be common. + // + IRDominatorTree* getDominatorTree() + { + if (!m_dominatorTree) + { + m_dominatorTree = computeDominatorTree(m_func); + } + return m_dominatorTree; + } + + // The meat of the work happens on a per-basic-block basis. + // + void eliminatePhisInBlock(IRBlock* block) + { + // We start by checking if the block has any parameters. + // If it doesn't then there is nothing to eliminate. + // + if( !block->getFirstParam() ) + { + return; + } + + // Once the early-exit case has been dealt with, the overall + // process amounts to three simple steps. + // + // 1. Create a temporary corresponding to each of the + // parameters of `block`. + // + createTempsForParams(block); + // + // 2. For each predecessor of `block`, eliminate the arguments + // it passes, by assigning them to the temporaries. + // + emitAssignmentsInPredecessors(block); + // + // 3. Replace all (remaining) uses of the block parameters with + // loads from the temporaries. + // + replaceParamUsesWithTemps(); + } + + // We need to record information about the parameters and the temporaries + // we create for them, so that subsequent steps can easily access them. + // + struct ParamInfo + { + IRParam* param = nullptr; + IRVar* temp = nullptr; + + // We track one additional field for each parameter, which is + // used to record its "current" value for the purposes of + // emitting assignments at the end of a predecessor block. + // + // The `currentVal` field will either be the same as `param` + // itself (if using the parameter directly is still safe) or + // a value loaded from `temp`, after the point where this + // parameter has been assigned to. + // + IRInst* currentVal = nullptr; + }; + + // We build a mapping from block parameters to their indices, + // which makes it convenient to look up whether a given `IRInst*` + // is a parameter and, if so, get its index in a single operation. + // + Dictionary<IRInst*, Index> mapParamToIndex; + + void createTempsForParams(IRBlock* block) + { + // The temporaries used to replace the parameters of `block` + // must be read-able any where that the parameters were accessible. + // They must also be write-able at every point that branches + // *into* `block`. The most narrowly-scoepd place that meets both + // of those criteria is the *immediate dominator* of `block`. + // + if (auto blockForTemps = getDominatorTree()->getImmediateDominator(block)) + { + // We will insert our new teporary variables at the *end* of the + // immediate dominator block, right before the terminator. In + // the case where the immediate dominator is also one of the + // predecessors of `block`, that terminator will branch to `block`, + // and we need the temporary variables to be in scope there, but + // no earlier. + // + auto terminator = blockForTemps->getTerminator(); + SLANG_ASSERT(terminator); + m_builder.setInsertBefore(terminator); + } + else + { + // There are two cases where a `block` would not have an immedidate + // dominator. The first is that is that it is the entry block of + // its function, but we already skipped over such blocks earlier. + // The second case is that `block` is unreachable. + // + // In the case of an unreachable block, it doesn't especially + // matter what we do. In principle we could leave such blocks + // as-is and expect later steps to ignore tham and/or their + // parameters. + // + // In an attempt to make this code as robust as possible, we + // will handle any unreachable blocks by inserting the + // temporary variables right after the parameters (which means + // right *before* the ordinary body instructions). + // + // Note that nothing in the code here will *initialize* those + // temporaries, so if the unreachable code were to somehow + // get executed, the values would be undefined. + // + auto firstOrdinaryInst = block->getFirstOrdinaryInst(); + SLANG_ASSERT(firstOrdinaryInst); + m_builder.setInsertBefore(firstOrdinaryInst); + } + + // Now that we've set up the IR builder for inserting our + // temporaries, we are going to iterate over the parameters + // and create a temporary for each. Along the way we will + // be building up auxilliary data structures that the + // subsequent steps will make use of. + // + mapParamToIndex.Clear(); + phiInfos.clear(); + Count paramCounter = 0; + for (auto param : block->getParams()) + { + Index paramIndex = paramCounter++; + mapParamToIndex.Add(param, paramIndex); + + // Note that the `emitVar` operation expects to be passed the + // type *stored* in the variable, but the IR `var` instruction + // itself will have a pointer type. Thus if `param` has type + // `T`, then `temp` will have type `T*`. + // + auto temp = m_builder.emitVar(param->getDataType()); + // + // Because we will be eliminating the paramter, we can transfer + // any decorations that were added to it (notably any name hint) + // to the temporary that will replace it. + // + param->transferDecorationsTo(temp); + + // The other main auxilliary sxtructure is used to track + // both per-parameter information (which we can fill in + // here) and information about each value *assigned* to + // a parameter at a branch site. Both kinds of information + // are stored in the same array, but we only initialize the + // relevant fields here. + // + PhiInfo phiInfo; + auto& paramInfo = phiInfo.param; + paramInfo.param = param; + paramInfo.temp = temp; + phiInfos.add(phiInfo); + } + } + + + // The work of emitting assignments to our temporaries in + // the predecessors of `block` is really a per-predecessor task. + // + void emitAssignmentsInPredecessors(IRBlock* block) + { + // The only interesting detail at this level is that + // we need to work with a *copy* of the predecessor + // list. Our manipulation replaces the branch instruction + // at the end of each predecessor by adding a new one and + // removing the old. The addition/removing of branch + // instructions causes the predecessor list of `block` to + // be mutated, even if its contents end up the same. + // + List<IRBlock*> predecessors; + for (auto pred : block->getPredecessors()) + { + predecessors.add(pred); + } + for (auto pred : predecessors) + { + emitAssignmentsInPredecessor(pred); + } + } + + // We will put off discussion of `emitAssignmentsInPredecessor()` + // for now, because it is the thorniest part of the problem. + // + // Instead, let us look at the far simpler task of eliminating + // all the *other* uses of block parameters, after the branches + // have been dealt with. + // + void replaceParamUsesWithTemps() + { + for(auto& phiInfo : phiInfos) + { + auto& paramInfo = phiInfo.param; + auto param = paramInfo.param; + auto temp = paramInfo.temp; + + // We will repeatedly replace whatever the *first* + // use of `param` is, until there are no more uses + // left. Iterating in this fashion avoids any + // problems that would arise from trying to traverse + // the list of uses while also modifying it. + // + while (auto use = param->firstUse) + { + // We emit a distinct `load` from the temporary + // right before each instruction that uses the + // parameter. We do this to minimize the number + // of temporaries/copies that are created in + // the emit logic for high-level-language targets. + // + // We have logic that can "fold" a `load` instruction + // into a use site such that it shows up as an ordinary + // variable reference, but this logic currently only + // applies if there are no `store`s or other operations + // with possible side effects between the `load` and + // the place where it gets used. + // + // An alternative implementation of this pass might + // `load` each of our temporaries once, at the top of + // `block`, and then use that same value at all use sites. + // + auto user = use->getUser(); + m_builder.setInsertBefore(user); + auto newVal = m_builder.emitLoad(temp); + use->set(newVal); + } + + // Once we've replaced all its uses, there is no need + // to keep `param` around. + // + param->removeAndDeallocate(); + } + } + + // Now it is time to get back to `emitAssignmentsInPredecessor()`. + // + // As discussed in `replaceParamUsesWithTemps()`, we want to avoid + // emitting high-level-language output code with unnecessary copies. + // That goal makes the process of emitting assignments to our + // temporarites in the predecessors of `block` more challenging. + // + // To understand the challenge, consider a block like: + // + // block B(x,y): + // ... + // + // and a branch to that block of the form: + // + // br B(y, x); + // + // The phi operations here are effectively swapping `x` and `y`, + // so we know that output code will need at least *one* + // intermediate copy to do the job. + // + // We don't want to see output like: + // + // // br B(y,x); + // x = y; + // y = x; + // + // but we also don't want to see more copies then necessary: + // + // // br B(y,x); + // auto tmp_x = x; + // auto tmp_y = y; + // x = tmp_y; + // y = tmp_x; + // + // Our goal is emit a strictly *minimal* number of copies. + // + // In order to solve the problem, we need to track some information + // on a per-branch-site basis. The most obvious of this is information + // about each argument that the branch pasess to a corresponding + // block parameter. + // + struct ArgInfo + { + // We track the original argument value that was passed at the branch. + // + IRInst* originalVal = nullptr; + + // At a branch site, we can consider that the goal is to assign + // each argument (`ArgInfo`) to the temporary for a corresponding + // parameter (`ParamInfo`). + // + // The problematic cases arise when an argument value is itself + // a reference to a block parameter (e.g., in the `(y,x) -> (x,y)` + // case). We thus track whether or not `originalVal` above is + // itself a block parameter and, if so, what the index of that + // parameter is. + // + Index paramIndex = kInvalidIndex; + + // When there is a cyclic dependency between arguments at + // a branch site, no sequence of plain assignments (without + // additional copies) will suffice. + // + // When we end up having to break cycles, we do so by loading + // a copy of the value in one of the per-parameter temporaries. + // Any subsequent branch arguments that referenced the parameter + // will need to use that copy instead. + // + // In order to make sure that we properly reference the loaded + // copy instead of the original argument in such cases, we use + // a pointer field for each argument that points to a location + // where the up-to-date value can be found. + // + // For most arguments this will always just point to `originalVal`, + // but for arguments that refer to a block parameter, this will + // point to the `currentVal` field of the corresponding `ParamInfo`. + // + IRInst** currentValPtr = nullptr; + }; + enum { kInvalidIndex = -1 }; + + // A lot of the logic in this pass is concerned with the process of + // emitting *assignments* from branch arguments to block parameters. + // Those assignments are implicit in our SSA IR, but this pass needs + // to make them explicit. + // + // In order to emit assignments in an order that minimizes the number + // of temporaries/copies that appear in output code, we need a way + // to track which assignments have been done, which are ready to be done, + // and which are "blocked" for some reason. We thus associate each + // assignment with an integer _state_ which is in one of three cases: + // + // * _done_ (`-1`): any instructions needed for the assignment have been emitted + // + // * _ready_ (`0`): the assignment can be emitted without causing any problems + // + // * _blocked_ (`N > 0`): the assignment cannot be emitted yet, because there + // are `N` other not-yet-done assignments that need to read the value of the + // parameter that this assignment wants to write. The reads need to be able + // to proceed before the write can go through. + // + enum + { + kState_Done = -1, + kState_Ready = 0, + }; + + // There is a one-to-one correspondance between: + // + // * The phis/parameters of a particular `block` + // * The arguments passed at some branch to `block` + // * The assignments that need to be performed at that branch site + // + // We thus use a single structure to track all of that information, + // which is handy but also requires careful thought at use sites + // about which version of the information is relevant. + // + struct PhiInfo + { + ParamInfo param; + ArgInfo arg; + Count state = kState_Ready; + }; + List<PhiInfo> phiInfos; + Count getParamCount() { return phiInfos.getCount(); } + + void initializeAssignmentInfo(IRUnconditionalBranch* branch, Index assignmentIndex) + { + // Each assignment is a request to write the value of + // some `srcArg` to some `dstParam`. + // + auto& assignment = phiInfos[assignmentIndex]; + auto& srcArg = assignment.arg; + auto& dstParam = assignment.param; + + // The actual argument values can be read off of + // the branch instruction. + // + auto srcArgVal = branch->getArg(assignmentIndex); + srcArg.originalVal = srcArgVal; + srcArg.currentValPtr = &srcArg.originalVal; + + // The parameters have largely been initialized in the + // per-block logic, but we do need to (re-)initialize + // the `currentVal` field to get it ready for a new + // sequence of assignments. + // + dstParam.currentVal = dstParam.param; + + // The main challenges arise when the argument value + // for an assignment is itself one of the parameters + // of the destination block. + // + // We can check if `srcArgVal` is a parameter using + // the map we pre-computed. + // + Index srcParamIndex = kInvalidIndex; + mapParamToIndex.TryGetValue(srcArgVal, srcParamIndex); + srcArg.paramIndex = srcParamIndex; + + if (srcParamIndex != kInvalidIndex) + { + // In the case where the source *is* a parameter, + // we may not be able to use the source parameter value + // directly for this assignment, since we might + // need to make a scratch copy of it. + // + // To be able to keep up with such changes if they + // occur, we will update `currentValPtr` to point + // to the `currentVal` of the source parameter. + // + auto& srcParamInfo = phiInfos[srcParamIndex].param; + srcArg.currentValPtr = &srcParamInfo.currentVal; + } + + // One very special case is when the source value to be assigned + // to a destination phi/parameter is the exact same phi/parameter. + // In such a case there is nothing that needs to be done, and we + // can consider the assignment fully handled. + // + if (srcParamIndex == assignmentIndex) + { + assignment.state = kState_Done; + } + else + { + // Otherwise we start out assuming that the assignment is ready + // to proceed, and then check to see whether it should be + // blocked as part of the next loop. + // + assignment.state = kState_Ready; + } + } + + void checkIfAssignmentBlocksAnother(Index assignmentIndex) + { + // We can skip any case where this assignment has already been + // performed/resolved, since it cannot lead to anything being + // blocked waiting on it. + // + auto& assignment = phiInfos[assignmentIndex]; + if (assignment.state == kState_Done) + { + return; + } + + // We only care about cases where the source/argument for this + // assignment is another parameter. + // + auto& srcArg = assignment.arg; + auto srcParamIndex = srcArg.paramIndex; + if (srcParamIndex == kInvalidIndex) + { + return; + } + + // Note that the sticky case of a parameter that refers to itself + // was already detected and handled in the previous loop. + // + SLANG_ASSERT(srcParamIndex != assignmentIndex); + // + // In fact, it is possible that the assignment for `srcParamIndex` + // has already been completed, since it was one of the self-referential + // cases. If that's true and the assignment is already marked as + // done, there is no reason to try and block it. + // + auto& srcParamAssignment = phiInfos[srcParamIndex]; + if (srcParamAssignment.state == kState_Done) + { + return; + } + + // Otherwise we are in exactly the case we are looking for. + // This `assignment` is of the form: + // + // temps[...] = temps[srcParamIndex]; + // + // and there is another assignment of the form: + // + // temps[srcParamIndex] = ...; + // + // That we cannot allow to proceed until after *this* assignment + // has been allowed to read from the temporary for `srcParamIndex`. + // + // The representation of both the _ready_ and _blocked_ states + // is equal to the number of "blockers," so in either case we can + // increment the `state` in order to add in a(nother) blocker. + // + srcParamAssignment.state++; + } + + void emitAssignmentsInPredecessor(IRBlock* pred) + { + // Given the way our IR is structured, we have an invariant that the + // predecessor block *must* end with an unconditional branch (as they + // are the only kinds of branches allowed to carry arguments). + // + auto branch = cast<IRUnconditionalBranch>(pred->getTerminator()); + SLANG_ASSERT(branch); + + // The predecessor block must pass the expected number of arguments + // to `block`, or the IR has been invalidated in some previous pass. + // + auto paramCount = getParamCount(); + Count argCount = branch->getArgCount(); + SLANG_ASSERT(argCount == paramCount); + + // We are going to emit a sequence of assignments that write the + // arguments of `branch` to the temporaries for the parameters of + // `block`. All of these assignments have to be the last thing we + // do before the branch. + // + m_builder.setInsertBefore(branch); + + // Our first order of business is to initialize the per-branch-site + // and per-argument/-assignment information, so that it is correct + // for `branch`. + // + for (Index assignmentIndex = 0; assignmentIndex < argCount; ++assignmentIndex) + { + initializeAssignmentInfo(branch, assignmentIndex); + } + + // We can now scan through our assignments and try to determine + // which of them are _blocked_. + // + // A assignment of `param_i <- arg_i` is blocked if there + // exists some other not-yet-done assignment of the form + // `param_j <- param_i`. That is, an assignment is blocked + // when the parameter it wants to write to is being used as + // the source/argument for some other assignment (that has not + // yet been completed). + // + // We can thus loop over the assignments and for each one see + // if it is in the form `param_j <- param_i` for some `i` and, + // if so, tell the assignment for `param_i <- ....` that it is + // blocked. + // + for (Index assignmentIndex = 0; assignmentIndex < paramCount; ++assignmentIndex) + { + checkIfAssignmentBlocksAnother(assignmentIndex); + } + + // Once we have identified the cases where one assignment is + // blocked on another, we can scan through the list of assignments + // and try to perform any assignmenst that are in the _ready_ state, + // as well as any additional assignments that become _ready_ as a result. + // + for (Index assignmentIndex = 0; assignmentIndex < paramCount; ++assignmentIndex) + { + tryPerformParamAssignment(assignmentIndex); + } + + // The only assignments that could not be completed in the previous + // loop would be those that are part of a dependency cycle. + // We make one more loop over the assignments, and this time we will + // ensure that the assignment gets to the _done_ state, even if doing + // so requires loading a copy of one of our temporaries. + // + for (Index assignmentIndex = 0; assignmentIndex < paramCount; ++assignmentIndex) + { + completeAssignmentUsingCopyIfNeeded(assignmentIndex); + } + + // Once we are sure all the assignment operations have been performed, + // we can set about replacing the unconditional branch itself. + // + replaceBranch(branch); + } + + // Replacing the branch instruction at the end of a predecessor block + // is relatively simple, and just a bit of busy-work. + // + void replaceBranch(IRUnconditionalBranch* oldBranch) + { + // When creating a replacement instruction here, we need to make sure + // that we keep all the operands that weren't phi arguments. + // + Count oldOperandCount = oldBranch->getOperandCount(); + Count paramCount = getParamCount(); + Count newOperandCount = oldOperandCount - paramCount; + + // There are currently two different opcodes that map to unconditional + // branches, with different numbers of operands before the phi-related + // arguments: + // + // unconditionalBranch(TargetBlock, arg0, arg1, arg2, ...); + // loop(TargetBlock, BreakBlock, ContinueBlock, arg0, arg1, arg2, ...); + // + // In either case, there is a constant bound on the number of non-phi + // operands. + // + static const Count kMaxNewOperandCount = 3; + SLANG_ASSERT(newOperandCount <= kMaxNewOperandCount); + + IRInst* newOperands[kMaxNewOperandCount] = {}; + for (Index i = 0; i < newOperandCount; ++i) + { + newOperands[i] = oldBranch->getOperand(i); + } + + auto newBranch = m_builder.emitIntrinsicInst( + oldBranch->getFullType(), + oldBranch->getOp(), + newOperandCount, + newOperands); + oldBranch->transferDecorationsTo(newBranch); + + // TODO: We could consider just modifying `branch` in-place by clearing + // the relevant operands for the phi arguments and setting its operand + // count to a lower value. + // + oldBranch->removeAndDeallocate(); + } + + // The most subtle bit of logic, which relies on the data structures + // we have built so far, is the way we attempt to perform assignments + // that have become ready. + // + void tryPerformParamAssignment(Index firstAssignmentIndex) + { + // Performing one assignment may lead to another being unblocked, + // and entering the _ready_ state, in which case we wnat to perform + // *that* assignment, which may unblock another, etc. + // + // We thus use a loop and keep processing assignments as they + // become unblocked, until we run out of work. + // + Index assignmentIndex = firstAssignmentIndex; + for (;;) + { + auto& assignment = phiInfos[assignmentIndex]; + if (assignment.state != kState_Ready) + { + // if the assignment we are looking at isn't ready to + // perform, we have reached the end of a chain of + // unblocked depdendencies (perhaps even a chain of zero). + // + return; + } + + // When we have an assignment that is ready to perform, + // we do so by storing the value of the corresponding + // argument into the temporary for the coresponding + // parameter. + // + // Note that we use `actualValPtr` here instead of `originalVal`, + // so that any logic that might have moved another parameter + // into a temporary will influence our result. + // + auto& dstParam = assignment.param; + auto& srcArg = assignment.arg; + m_builder.emitStore(dstParam.temp, *srcArg.currentValPtr); + // + // Once the store is emitted, the assignment has been performed, + // and it can move to the _done_ state. + // + assignment.state = kState_Done; + + // If the source of this assignment as itself a block + // parameter, then we may need to unblock the assignment + // for that parameter. + // + // If the source *isn't* a parameter, then there is nothing + // to unblock, and we've reached the end of a chain. + // + auto srcParamIndex = srcArg.paramIndex; + if (srcParamIndex == kInvalidIndex) + { + return; + } + + // If the source *is* a parameter, but its assignment + // has already been performed, then we cannot unblock it + // (we certainly don't want to perform it again). + // + auto& srcParamAssignment = phiInfos[srcParamIndex]; + if (srcParamAssignment.state == kState_Done) + { + return; + } + + // If the source parameter's assignment hasn't been + // done yet, then we expect that it *must* be blocked + // (at the very least blocked on the assignment we + // have just performed). + // + SLANG_ASSERT(srcParamAssignment.state != kState_Ready); + + // We remove one blocker from the source. + // + srcParamAssignment.state--; + + // It is possible that removing this one blocker has + // moved the source parameter into the _ready_ state. + // Rather than check that here, we can simply move + // back to the top of this loop and consider the + // assignment corresponding to the source parameter + // as the next in our chain. + // + assignmentIndex = srcParamIndex; + } + } + + // When we want to make sure that all our assignments *definitely* + // get completed, we need to be willing to make a temporary copy + // of a branch parameter in order to unblock an assignment. + // + void completeAssignmentUsingCopyIfNeeded(Index assignmentIndex) + { + // If the assignemtn is _blocked_ because of one or more + // other assignments, we will unblock it by making a copy. + // + auto& assignment = phiInfos[assignmentIndex]; + if(assignment.state > 0) + { + auto& dstParam = assignment.param; + + // The assignment to `dstParam` is blocked because there + // exist one or more other not-yet-completed assignments + // where `dstParam` is being used as a source. + // + // We emit a `load` from the temporary for `dstParam` in + // order to make a copy, at which point we can safely + // perform the assignment for to the original, and allow + // the not-yet-completed assignments to use that copy + // instead, as the current value for `dstParam`. + // + dstParam.currentVal = m_builder.emitLoad(dstParam.temp); + assignment.state = kState_Ready; + } + + // If this assignment *was* blocked, we have made it so + // that it isn't blocked any more. We thus expect that + // trying to perform the assignment again will definitely + // result it the assignment being in the _done_ state. + // + tryPerformParamAssignment(assignmentIndex); + SLANG_ASSERT(assignment.state == kState_Done); + } +}; + +void eliminatePhis(CodeGenContext* codeGenContext, IRModule* module) +{ + PhiEliminationContext context(codeGenContext, module); + context.eliminatePhisInModule(); +} + +} diff --git a/source/slang/slang-ir-eliminate-phis.h b/source/slang/slang-ir-eliminate-phis.h new file mode 100644 index 000000000..58690c012 --- /dev/null +++ b/source/slang/slang-ir-eliminate-phis.h @@ -0,0 +1,16 @@ +// slang-ir-eliminate-phis.h +#pragma once + +namespace Slang +{ + struct CodeGenContext; + struct IRModule; + + /// Eliminate all "phi nodes" from the given `module`. + /// + /// This process moves the code in `module` *out* of SSA form, + /// so that it is more suitable for emission on targets that + /// are not themselves based on an SSA representation. + /// + void eliminatePhis(CodeGenContext* context, IRModule* module); +} diff --git a/source/slang/slang-ir-restructure-scoping.cpp b/source/slang/slang-ir-restructure-scoping.cpp index 0535483ec..dca1a92b6 100644 --- a/source/slang/slang-ir-restructure-scoping.cpp +++ b/source/slang/slang-ir-restructure-scoping.cpp @@ -307,24 +307,39 @@ static void fixValueScopingForInst( // If we've gotten this far, we know that `u` is a "bad" // use of `def`, and needs fixing. // - // We will create the `tmp` variable on demand, so - // that we create it when the first "bad" use is encountered, - // and then re-use it for subsequent bad uses. + // We will use a temporary variable to resolve the bad scoping, + // creating it on-demand when we ecounter a first "bad" use, and + // then re-using that temporary for any subsequent bad uses. // if( !tmp ) { - // We will create a temporary to represent `def`, - // and insert a `store` into it right after `def`. + // If the value is *already* a temporary variable, then + // we are really just trying to fix the scoping of the + // variable declaration itself, and the variable can + // effectively be its own temporary. // - // Note: we are inserting the new variable right - // after `def` for now, just because we don't - // yet know the final region that it should be - // placed into. We will move it to the correct - // location when we are done. - // - builder.setInsertBefore(def->getNextInst()); - tmp = builder.emitVar(def->getDataType()); - builder.emitStore(tmp, def); + if(auto varDef = as<IRVar>(def)) + { + tmp = varDef; + } + else + { + // We will create a temporary to represent `def`, + // and insert a `store` into it right after `def`. + // + // Note: we are inserting the new variable right + // after `def` for now, just because we don't + // yet know the final region that it should be + // placed into. We will move it to the correct + // location when we are done. + // + builder.setInsertBefore(def->getNextInst()); + tmp = builder.emitVar(def->getDataType()); + builder.emitStore(tmp, def); + // + // Note: the lifetime for the new variable starts + // right after the store we have emitted. + } } // In order to know where `tmp` should be defined @@ -345,19 +360,40 @@ static void fixValueScopingForInst( rr); } - // To fix up the use `u`, we will need to change - // it from using `def` to using a load from `tmp` - // - builder.setInsertBefore(user); - IRInst* tmpVal = builder.emitLoad(tmp); - - // We are clobbering the value used by the `IRUse` `u`, - // while will cut it out of the list of uses for `def`. - // We need to be careful when doing this to not disrupt - // our iteration of the uses of `def`, so we carefully - // used the `nextUse` temporary at the start of the loop. + // We need to fix up the use `u`, but the way we fix + // it depends on whether we moving `def` itself (in which + // case `tmp` and `def` are the same), or if we have + // introduced an intermediate temporary. // - u->set(tmpVal); + if (def == tmp) + { + // If we are moving the definition itself, we don't + // need to do any kind of fix-up work at use sites. + } + else + { + // Othwerise we need to fix up the use `use` so + // that it uses a value loaded from `tmp` instead + // of `def`. + // + builder.setInsertBefore(user); + IRInst* tmpVal = builder.emitLoad(tmp); + + // We are clobbering the value used by the `IRUse` `u`, + // while will cut it out of the list of uses for `def`. + // We need to be careful when doing this to not disrupt + // our iteration of the uses of `def`, so we carefully + // used the `nextUse` temporary at the start of the loop. + // + // Note(tfoley): This is more subtle than the comment makes + // it out to be, because we are *also* injecting a new use + // of `def` in the logic that creates `tmp` (because `def` + // is used as an operand to the `store` that initializes + // `tmp`). We really ought to work on a copy of the use-def + // information. + // + u->set(tmpVal); + } } // At the end of the loop, the `tmp` variable will have @@ -421,8 +457,27 @@ void fixValueScoping(RegionTree* regionTree) if(!parentRegion) continue; - for(auto inst : block->getDecorationsAndChildren()) + // Note: This pass will end up modifying the IR while also + // iterating over it. As such, we need to be careful not + // to let our iteration logic get confused. + // + // In particular, it is possible that `inst` will get moved + // to another block, as a way to resolve scoping issues, and + // if we did not account for that result, we might end up + // walking to the next instruction after `inst` even though + // it isn't inside `block`. + // + // We defensively cache the next instruction to visit so that + // we can continue our iteration after `inst` even if it gets + // moved. For now we are confident that the operations on + // `inst` won't affect `nextInst`, since the pass is not supposed + // to move or delete any *other* instructions. + // + IRInst* nextInst = nullptr; + for (auto inst = block->getFirstOrdinaryInst(); inst; inst = nextInst) { + nextInst = inst->getNextInst(); + fixValueScopingForInst(inst, parentRegion, regionTree); } } |
