diff options
Diffstat (limited to 'source')
| -rw-r--r-- | source/slang/emit.cpp | 105 | ||||
| -rw-r--r-- | source/slang/ir-dce.cpp | 325 | ||||
| -rw-r--r-- | source/slang/ir-dce.h | 19 | ||||
| -rw-r--r-- | source/slang/ir-inst-defs.h | 7 | ||||
| -rw-r--r-- | source/slang/ir-insts.h | 23 | ||||
| -rw-r--r-- | source/slang/ir-legalize-types.cpp | 2 | ||||
| -rw-r--r-- | source/slang/ir-specialize-resources.cpp | 1087 | ||||
| -rw-r--r-- | source/slang/ir-specialize-resources.h | 24 | ||||
| -rw-r--r-- | source/slang/ir.cpp | 79 | ||||
| -rw-r--r-- | source/slang/ir.h | 6 | ||||
| -rw-r--r-- | source/slang/lower-to-ir.cpp | 6 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj | 6 | ||||
| -rw-r--r-- | source/slang/slang.vcxproj.filters | 18 | ||||
| -rw-r--r-- | source/slang/syntax.cpp | 2 | ||||
| -rw-r--r-- | source/slang/syntax.h | 8 |
15 files changed, 1683 insertions, 34 deletions
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp index 39616b3df..f8b14b0f3 100644 --- a/source/slang/emit.cpp +++ b/source/slang/emit.cpp @@ -2,9 +2,11 @@ #include "emit.h" #include "../core/slang-writer.h" +#include "ir-dce.h" #include "ir-insts.h" #include "ir-restructure.h" #include "ir-restructure-scoping.h" +#include "ir-specialize-resources.h" #include "ir-ssa.h" #include "ir-validate.h" #include "legalize-types.h" @@ -5603,21 +5605,56 @@ struct EmitVisitor emit("}\n"); } + /// Emit the array brackets that go on the end of a declaration of the given type. void emitArrayBrackets( EmitContext* ctx, - IRType* type) + IRType* inType) { SLANG_UNUSED(ctx); - if(auto arrayType = as<IRArrayType>(type)) - { - emit("["); - EmitVal(arrayType->getElementCount(), kEOp_General); - emit("]"); - } - else if(auto unsizedArrayType = as<IRUnsizedArrayType>(type)) + // A declaration may require zero, one, or + // more array brackets. When writing out array + // brackets from left to right, they represent + // the structure of the type from the "outside" + // in (that is, if we have a 5-element array of + // 3-element arrays we should output `[5][3]`), + // because of C-style declarator rules. + // + // This conveniently means that we can print + // out all the array brackets with a looping + // rather than a recursive structure. + // + // We will peel the input type like an onion, + // looking at one layer at a time until we + // reach a non-array type in the middle. + // + IRType* type = inType; + for(;;) { - emit("[]"); + if(auto arrayType = as<IRArrayType>(type)) + { + emit("["); + EmitVal(arrayType->getElementCount(), kEOp_General); + emit("]"); + + // Continue looping on the next layer in. + // + type = arrayType->getElementType(); + } + else if(auto unsizedArrayType = as<IRUnsizedArrayType>(type)) + { + emit("[]"); + + // Continue looping on the next layer in. + // + type = unsizedArrayType->getElementType(); + } + else + { + // This layer wasn't an array, so we are done. + // + return; + } } } @@ -5752,16 +5789,6 @@ struct EmitVisitor emit(";\n"); } - IRType* unwrapArray(IRType* type) - { - IRType* t = type; - while( auto arrayType = as<IRArrayTypeBase>(t) ) - { - t = arrayType->getElementType(); - } - return t; - } - void emitIRStructuredBuffer_GLSL( EmitContext* ctx, IRGlobalParam* varDecl, @@ -6546,6 +6573,46 @@ String emitEntryPoint( #endif validateIRModuleIfEnabled(compileRequest, irModule); + // After type legalization and subsequent SSA cleanup we expect + // that any resource types passed to functions are exposed + // as their own top-level parameters (which might have + // resource or array-of-...-resource types). + // + // Many of our targets place restrictions on how certain + // resource types can be used, so that having them as + // function parameters is invalid. To clean this up, + // we will try to specialize called functions based + // on the actual resources that are being passed to them + // at specific call sites. + // + // Because the legalization may depend on what target + // we are compiling for (certain things might be okay + // for D3D targets that are not okay for Vulkan), we + // pass down the target request along with the IR. + // + specializeResourceParameters(compileRequest, targetRequest, irModule); + +#if 0 + dumpIRIfEnabled(compileRequest, irModule, "AFTER RESOURCE SPECIALIZATION"); +#endif + validateIRModuleIfEnabled(compileRequest, irModule); + + // The resource-based specialization pass above + // may create specialized versions of functions, but + // it does not try to completely eliminate the original + // functions, so there might still be invalid code in + // our IR module. + // + // To clean up the code, we will apply a fairly general + // dead-code-elimination (DCE) pass that only retains + // whatever code is "live." + // + eliminateDeadCode(compileRequest, irModule); +#if 0 + dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE"); +#endif + validateIRModuleIfEnabled(compileRequest, irModule); + // After all of the required optimization and legalization // passes have been performed, we can emit target code from // the IR module. diff --git a/source/slang/ir-dce.cpp b/source/slang/ir-dce.cpp new file mode 100644 index 000000000..0f037bfe5 --- /dev/null +++ b/source/slang/ir-dce.cpp @@ -0,0 +1,325 @@ +// ir-dce.cpp +#include "ir-dce.h" + +#include "ir.h" +#include "ir-insts.h" + +namespace Slang +{ + +struct DeadCodeEliminationContext +{ + // This type implements a simple global DCE pass over + // an entire module. + // + // We start with member variables to stand in for + // the parameters that were passed to the top-level + // `eliminateDeadCode` function. + // + CompileRequest* compileRequest; + IRModule* module; + + // Our overall process is going to be to determine + // which instructions in the module are "live" + // and then eliminate anything that wasn't found to + // be live. + // + // We will track the liveness state by keeping + // a set of all instructions we have so far determined + // to be live. + // + HashSet<IRInst*> liveInsts; + + // Querying whether an instruction has been + // determined to be live is easy. + // + bool isInstLive(IRInst* inst) + { + // The only wrinkle is that we want to safeguard + // against a null instruction (there are some + // corner cases where we still construct IR + // instructions with a null type). + // + if(!inst) return false; + + return liveInsts.Contains(inst); + } + + // We are going to do an iterative analysis + // where we mark instructions we know are + // live, and then see if that can help us + // identify any other instructions that + // must also be live. + // + // For this, we will use a work list of + // instructions that have been marked + // as live, but for which we haven't + // looked at their impact on other + // instructions. + // + List<IRInst*> workList; + + // When we discover that an instruction seems + // to be live, we will add it to our set, + // and also the work list, but only if we + // haven't done so previously. + // + void markInstAsLive(IRInst* inst) + { + // Again, we safeguard against null instructions + // just in case. + // + if(!inst) return; + + if(liveInsts.Contains(inst)) + return; + liveInsts.Add(inst); + workList.Add(inst); + } + + // Given the basic infrastructrure above, let's + // dive into the task of actually finding all + // the live code in a module. + // + void processModule() + { + // First of all, we know that the root module instruction + // should be considered as live, because otherwise + // we'd end up eliminating it, so that is a + // good place to start. + // + markInstAsLive(module->getModuleInst()); + + // Marking the module as live should have + // seeded our work list, so we can now start + // processing entries off of our work list + // until it goes dry. + // + while( workList.Count() ) + { + auto inst = workList.Last(); + workList.RemoveLast(); + + // At this point we know that `inst` is live, + // and we want to start considering which other + // instructions must be live because of that + // knowlege. + // + // A first easy case is that the parent (if any) + // of a live instruction had better be live, or + // else we might delete the parent, and + // the child with it. + // + markInstAsLive(inst->getParent()); + + // Next the type of a live instruction, and all + // of its operands must also be live, or else + // we won't be able to compute its value. + // + markInstAsLive(inst->getFullType()); + UInt operandCount = inst->getOperandCount(); + for( UInt ii = 0; ii < operandCount; ++ii ) + { + markInstAsLive(inst->getOperand(ii)); + } + + // Finally, we need to consider the children + // and decorations of the instruction. + // + // Note that just because an instruction is + // live doesn't mean its children must be, or + // else we'd never eliminate *anything* (we + // marked the whole module as live, and everything + // is a transitive child of the module). + // + // Decorations, in contrast, are always live if their + // parents are (because we don't want to silently drop + // decorations). It is still important to *mark* + // decorations as live, because they have operands, + // and those operands need to be marked as live. + // We will fold decorations into the same loop + // as children for simplicity. + // + // To keep the code here simple, we'll defer the + // decision of whether a child (or decoration) + // should be live when its parent is to a subroutine. + // + for( auto child : inst->getDecorationsAndChildren() ) + { + if(shouldInstBeLiveIfParentIsLive(child)) + { + // In this case, we know `inst` is live and + // its `child` should be live if its parent is, + // so the `child` must be live too. + // + markInstAsLive(child); + } + } + } + + // If our work list runs dry, that means we've reached a steady + // state where everything that is transitively relevant to + // the "outputs" of the module has been marked as live. + // + // Now we can simply walk through all of our instructions + // recursively and eliminate those that are "dead" by + // virtue of not having been found live. + // + eliminateDeadInstsRec(module->getModuleInst()); + } + + void eliminateDeadInstsRec(IRInst* inst) + { + // Given the instruction `inst` we need to eliminate + // any dead code at, or under it. + // + // The easy case is if `inst` is dead (that is, not live). + // + if( !isInstLive(inst) ) + { + // We can simply remove and deallocate `inst` because it is + // dead, and not worry about any of its descendents, + // because they must have been dead too (since we always + // mark the parent of a live instruction as live). + // + inst->removeAndDeallocate(); + } + else + { + // If `inst` is live, then we need to deal with the possibility + // that its children/decorations (or descendents in general) + // might still be dead. + // + // The biggest wrinkle is that we walk the linked list of + // children/decorations a bit carefully, using a temporary + // to hold the next node, in case we eliminate one of + // the children as we go. + // + IRInst* next = nullptr; + for( IRInst* child = inst->getFirstDecorationOrChild(); child; child = next ) + { + next = child->getNextInst(); + eliminateDeadInstsRec(child); + } + } + } + + // Now we come to the decision procedure we put off before: + // should a given `inst` be live if its parent is? + // + bool shouldInstBeLiveIfParentIsLive(IRInst* inst) + { + // The main source of confusion/complexity here is that + // we are using the same routine to decide: + // + // * Should some ordinary instruction in a basic block be kept around? + // * Should a basic block in some function be kept around? + // * Should a function/type/variable in a module be kept around? + // + // Still, there are a few basic patterns we can observe. + // First, if `inst` is an instruction that might have some effects + // when it is executed, then we should keep it around. + // + if(inst->mightHaveSideEffects()) + return true; + // + // The `mightHaveSideEffects` query is conservative, and will + // return `true` as its default mode, so once we are past that + // query we know that `inst` is either something "structural" + // (that makes up the program) rather than executable, or it + // is executable but was on a white list of things that are + // safe to eliminate. + + // Most top-level objects (functions, types, etc.) obviously + // do *not* have side effects. That creates the risk that + // we'll just go ahead and eliminate every single function/type + // in a module. There needs to be a way to identify the + // functions we want to keep around, and for right now + // that is handled with the `[entryPoint]` decoration. + // + if(inst->findDecorationImpl(kIROp_EntryPointDecoration)) + return true; + // + // TODO: Eventually it would make sense to consider everything + // with an `[export(...)]` decoration as live, but our current + // approach to linking for back-end compilation leaves many + // linkage decorations in place that we seemingly don't need/want. + + // A basic block is an interesting case. Knowing that a function + // is live means that its entry block is live, but the liveness + // of any other blocks is determined by whether they are referenced + // by other instructions (e.g., a branch from one block to + // another). + // + if( auto block = as<IRBlock>(inst) ) + { + // To determine whether this is the first block in its + // parent function (or what-have-you) we can simply + // check if there is a previous block before it. + // + auto prevBlock = block->getPrevBlock(); + return prevBlock == nullptr; + } + + // There are a few special cases of "structural" instructions + // that we don't want to eliminate, so we'll check for those next. + // + switch( inst->op ) + { + // Function parameters obviously shouldn't get eliminated, + // even if nothing references them, and block parameters + // (phi nodes) will be considered live when their block is, + // just so that we don't have to deal with any complications + // around re-writing the relevant inter-block argument passing. + // + // TODO: A smarter DCE pass could deal with this case more + // carefully, or we could improve the interprocedural SCCP + // pass to deal with block parameters instead. + // + case kIROp_Param: + return true; + + // IR struct types and witness tables are currently kludged + // so that they have child instructions that represent their + // entries (effectively `(key,value)` pairs), and those child + // instructions are never directly referenced (e.g., an access + // to a struct field references the *key* but not the `(key,value)` + // pair that is the `IRField` instruction. + // + // TODO: at some point the IR should use a different representation + // for struct types and witness tables that does away with + // this problem. + // + case kIROp_StructField: + case kIROp_WitnessTableEntry: + return true; + + default: + break; + } + + // If none of the explicit cases above matched, then we will consider + // the instruction to not be live just because its parent is. Further + // analysis could still lead to a change in the status of `inst`, if + // an instruction that uses it as an operand is marked live. + // + return false; + } +}; + +// The top-level function for invoking the DCE pass +// is straighforward. We set up the context object +// and then defer to it for the real work. +// +void eliminateDeadCode( + CompileRequest* compileRequest, + IRModule* module) +{ + DeadCodeEliminationContext context; + context.compileRequest = compileRequest; + context.module = module; + + context.processModule(); +} + +} diff --git a/source/slang/ir-dce.h b/source/slang/ir-dce.h new file mode 100644 index 000000000..fd56616d9 --- /dev/null +++ b/source/slang/ir-dce.h @@ -0,0 +1,19 @@ +// ir-dce.h +#pragma once + +namespace Slang +{ + class CompileRequest; + struct IRModule; + + /// Eliminate "dead" code from the given IR module. + /// + /// This pass is primarily designed for flow-insensitive + /// "global" dead code elimination (DCE), such as removing + /// types that are unused, functions that are never called, + /// etc. + /// + void eliminateDeadCode( + CompileRequest* compileRequest, + IRModule* module); +} diff --git a/source/slang/ir-inst-defs.h b/source/slang/ir-inst-defs.h index 69940a79d..ee390b97b 100644 --- a/source/slang/ir-inst-defs.h +++ b/source/slang/ir-inst-defs.h @@ -382,6 +382,13 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0) INST(GloballyCoherentDecoration, globallyCoherent, 0, 0) INST(PatchConstantFuncDecoration, patchConstantFunc, 1, 0) + /// An `[entryPoint]` decoration marks a function that represents a shader entry point. + INST(EntryPointDecoration, entryPoint, 0, 0) + + /// A `[dependsOn(x)]` decoration indicates that the parent instruction depends on `x` + /// even if it does not otherwise reference it. + INST(DependsOnDecoration, dependsOn, 1, 0) + /* LinkageDecoration */ INST(ImportDecoration, import, 1, 0) INST(ExportDecoration, export, 1, 0) diff --git a/source/slang/ir-insts.h b/source/slang/ir-insts.h index 737675d87..26d5bcf05 100644 --- a/source/slang/ir-insts.h +++ b/source/slang/ir-insts.h @@ -297,7 +297,7 @@ struct IRLookupWitnessTable : IRInst struct IRCall : IRInst { - IRUse func; + IR_LEAF_ISA(Call) IRInst* getCallee() { return getOperand(0); } @@ -996,6 +996,11 @@ struct IRBuilder IRDecoration* addDecoration(IRInst* value, IROp op, IRInst* const* operands, Int operandCount); + IRDecoration* addDecoration(IRInst* value, IROp op) + { + return addDecoration(value, op, (IRInst* const*) nullptr, 0); + } + IRDecoration* addDecoration(IRInst* value, IROp op, IRInst* operand) { return addDecoration(value, op, &operand, 1); @@ -1087,6 +1092,22 @@ struct IRBuilder { addDecoration(value, kIROp_ExportDecoration, getStringValue(mangledName)); } + + void addEntryPointDecoration(IRInst* value) + { + addDecoration(value, kIROp_EntryPointDecoration); + } + + /// Add a decoration that indicates that the given `inst` depends on the given `dependency`. + /// + /// This decoration can be used to ensure that a value that an instruction + /// implicitly depends on cannot be eliminated so long as the instruction + /// itself is kept alive. + /// + void addDependsOnDecoration(IRInst* inst, IRInst* dependency) + { + addDecoration(inst, kIROp_DependsOnDecoration, dependency); + } }; // Helper to establish the source location that will be used diff --git a/source/slang/ir-legalize-types.cpp b/source/slang/ir-legalize-types.cpp index a97cc0393..ec9baed32 100644 --- a/source/slang/ir-legalize-types.cpp +++ b/source/slang/ir-legalize-types.cpp @@ -219,7 +219,7 @@ static LegalVal legalizeCall( return LegalVal::simple(context->builder->emitCallInst( callInst->getFullType(), - callInst->func.get(), + callInst->getCallee(), instArgs.Count(), instArgs.Buffer())); } diff --git a/source/slang/ir-specialize-resources.cpp b/source/slang/ir-specialize-resources.cpp new file mode 100644 index 000000000..e6d4351f2 --- /dev/null +++ b/source/slang/ir-specialize-resources.cpp @@ -0,0 +1,1087 @@ +// ir-specialize-resources.cpp +#include "ir-specialize-resources.h" + +#include "ir.h" +#include "ir-insts.h" + +namespace Slang +{ + +struct ResourceParameterSpecializationContext +{ + // This type implements a pass to specialize functions + // with resource parameters to ensure that they are + // legal for a given target. + // + // We start with member variables to stand in for + // the parameters that were passed to the top-level + // `specializeResourceParameters` function. + // + CompileRequest* compileRequest; + TargetRequest* targetRequest; + IRModule* module; + + // Our general approach will be to think in terms + // of specializing call sites, which amount to + // `IRCall` instructions. We will keep a work list + // of call sites in the program that may be worth + // considering for specialization. + // + List<IRCall*> workList; + + // Because we may need to generate specialized functions + // and generate new calls to those functions, we'll + // need some IR building state to get our work done. + // + SharedIRBuilder sharedBuilderStorage; + IRBuilder builderStorage; + IRBuilder* getBuilder() { return &builderStorage; } + + // With the basic state out of the way, let's walk + // through the overall flow of the pass. + // + void processModule() + { + // We will start by initializing our IR building state. + // + sharedBuilderStorage.module = module; + sharedBuilderStorage.session = module->getSession(); + builderStorage.sharedBuilder = &sharedBuilderStorage; + + // Next we will populate our initial work list by + // recursively finding every single call site in the module. + // + addCallsToWorkListRec(module->getModuleInst()); + + // We will process the work list until it goes dry, + // treating it like a stack of work items. + // + while( workList.Count() ) + { + auto call = workList.Last(); + workList.RemoveLast(); + + // At each call site we first check whether it + // is something we can (and should) specialize, + // and if so, do it. The process of specializing + // a function may introduce new call sites that + // become candidates for specialization, so + // our work list may grow along the way. + // + if( canSpecializeCall(call) ) + { + specializeCall(call); + } + } + } + + // Setting up the work list is a simple recursive procedure. + // + void addCallsToWorkListRec(IRInst* inst) + { + // If we have a call site, then add it to the list. + // + if( auto call = as<IRCall>(inst) ) + { + workList.Add(call); + } + + // Recursively walk through any children, to + // see if we uncover more call sites. + // + for( auto child : inst->getChildren() ) + { + addCallsToWorkListRec(child); + } + } + + // We need a way to decide for a given call site + // whether we can/must specialize it. + // + bool canSpecializeCall(IRCall* call) + { + // We can only specialize calls where the callee + // func can be statically identified, and where + // the callee is a definition (with body) rather + // than a declaration. Otherwise there is no + // way to generate a specialized callee function. + // + auto func = as<IRFunc>(call->getCallee()); + if(!func) + return false; + if(!func->isDefinition()) + return false; + + // With the basic checks out of the way, there are + // two conditions we care about: + // + // 1. Should we specialize? This amounts to whether + // `func` has any parameters that need specialization. + // We will call those "specializable" parameters for + // lack of a better name. + // + // 2. Can we specialize? This amounts to whether the + // arguments in `call` that correspond to those + // specializable parameters are "suitable" for use + // in specialization. + // + // We are going to answer both of these queries in + // a single loop that walks over the parameters of + // `func` as well as the arguments to `call`. + // + // The loop may seem a bit awkward because we are + // doing a parallel iteration over a linked list + // (the parameters of `func`) and an array (the + // arguments of `call`). + // + bool anySpecializableParam = false; + UInt argCounter = 0; + for( auto param : func->getParams() ) + { + UInt argIndex = argCounter++; + SLANG_ASSERT(argIndex < call->getArgCount()); + auto arg = call->getArg(argIndex); + + // If the given parameter doesn't need specialization, + // then we need to keep looking. + // + if(!doesParamNeedSpecialization(param)) + continue; + + // If we have run into a `param` that needs specialization, + // then our first condition is met. + // + anySpecializableParam = true; + + // Now we need to check whether `arg` is actually suitable + // for specialization (our second condition). If not, we + // can bail out immediately because our second condition + // cannot be met. + // + if(!isArgSuitableForSpecialization(arg)) + return false; + } + + // If we exit the loop, then the second condition must have + // been met (all the arguments for specializable parameters + // were suitable for specialization), and the result of the + // query comes down to the first condition. + // + return anySpecializableParam; + } + + // Of course, now we need to back-fill the predicates that + // the above function used to evaluate prameters and arguments. + + bool doesParamNeedSpecialization(IRParam* param) + { + // Whether or not a parameter needs specialization is really + // a function of its type: + // + IRType* type = param->getDataType(); + + // What's more, if a parameter of type `T` would need + // specialization, then it seems clear that a parameter + // of type "array of `T`" would also need specialization. + // We will "unwrap" any outer arrays from the parameter + // type before moving on, since they won't affect + // our decision. + // + type = unwrapArray(type); + + // On all of our (current) targets, a function that + // takes a `ConstantBuffer<T>` parameter requires + // specialization. Surprisingly this includes DXIL + // because dxc apparently does not treat `ConstantBuffer<T>` + // as a first-class type. + // + if(as<IRUniformParameterGroupType>(type)) + return true; + + // For GL/Vulkan targets, we also need to specialize + // any parameters that use structured or byte-addressed + // buffers. + // + if( isKhronosTarget(targetRequest) ) + { + if(as<IRHLSLStructuredBufferTypeBase>(type)) + return true; + if(as<IRByteAddressBufferTypeBase>(type)) + return true; + } + + // For now, we will not treat any other parameters as + // needing specialization, even if they use resource + // types like `Texure2D`, because these are allowed + // as function parameters in both HLSL and GLSL. + // + // TODO: Eventually, if we start generating SPIR-V + // directly rather than through glslang, we will need + // to specialize *all* resource-type parameters + // to follow the restrictions in the spec. + // + // TODO: We may want to perform more aggressive + // specialization in general, especially insofar + // as it could simplify the task of supporting + // functions with resource-type outputs. + + return false; + } + + bool isArgSuitableForSpecialization(IRInst* inArg) + { + // Determining if an argument is suitable for + // specializing a callee function requires + // looking at its (recurisve) structure. + // + // Rather than write a recursively procedure + // here, we will be tail-recursive by using + // a simple loop. + // + IRInst* arg = inArg; + for(;;) + { + // The leaf case we care about is when the + // argument at the call site is a global + // shader parameter, because then we can + // specialize a callee to refer to the same + // global parameter directly. + // + if(as<IRGlobalParam>(arg)) return true; + + // As we will see later, we can also + // specialize a call when the argument + // is the result of indexing into an + // array (`base[index]`) *if* the `base` + // of the indexing operation is also + // suitable for specialization. + // + if( arg->op == kIROp_getElement ) + { + auto base = arg->getOperand(0); + + // We will "recurse" on the base of + // the indexing operation by continuing + // our loop with the `base` as our new + // argument. + // + arg = base; + continue; + } + + // By default, we will *not* consider an argument + // suitable for specialization. + // + // TODO: There may be other cases that are worth + // handling here. The current code is based on + // observation of what simple shaders do in + // practice. + // + return false; + } + } + + // Once we'e determined that a given call site can/should + // be specialized, we need to perform the actual specialization. + // This is where things are going to get more involved. + // + // There are a few different concerns we need to deal with + // that mean we end up having two different passes that walk + // over the parameters/arguments of the call (in addition to + // the ones we had above for determining if we can/should + // specialize in the first place). + // + // The first of the two passes determines information + // relevant to the call site, comprising both the arguments + // that will be passed to the specialized function as + // well as a "key" to identify the specialized function + // that is required. + // + // The key type is similar to that used for generic specialization + // elsewhere in the IR code. It might be worth pulling this + // notion out somewhere more centralized, but we are dealing + // with the code duplication for now. + // + struct Key + { + // The structure of a specialization key will be a list + // of instructions starting with the function to be specialized, + // and then having one or more entries for each parameter + // that is being specialized to indicate the value to which + // it is being specialized (e.g. the global shader parameter). + // + List<IRInst*> vals; + + // In order to use this type as a `Dictionary` key we + // need it to support equality and hashing, but the + // implementaitons are straightforward. + // + // TODO: honestly we might consider having `GetHashCode` + // and `operator==` defined for `List<T>`. + + bool operator==(Key const& other) const + { + auto valCount = vals.Count(); + if(valCount != other.vals.Count()) return false; + for( UInt ii = 0; ii < valCount; ++ii ) + { + if(vals[ii] != other.vals[ii]) return false; + } + return true; + } + + int GetHashCode() const + { + auto valCount = vals.Count(); + int hash = Slang::GetHashCode(valCount); + for( UInt ii = 0; ii < valCount; ++ii ) + { + hash = combineHash(hash, Slang::GetHashCode(vals[ii])); + } + return hash; + } + }; + + // As indicated above, the information we collect about a call + // site consists of the key for the specialized function we + // will call, and a list of the arguments that will be passed + // to the call. + // + struct CallSpecializationInfo + { + Key key; + List<IRInst*> newArgs; + }; + + // Once we've collected the information about a call site + // we can use a dictionary to see if we already created + // a specialized version of the callee that matches its + // requirements. + // + Dictionary<Key, IRFunc*> specializedFuncs; + + // If the dictionary didn't have a specialized function + // suitable for a call site, we need a second information-gathering + // pass to decide what the new parameters of the specialized + // functions should be, and what instructions the new function + // must execute in its body to set up the replacements for the + // old parameters. + // + struct FuncSpecializationInfo + { + List<IRParam*> newParams; + List<IRInst*> newBodyInsts; + List<IRInst*> replacementsForOldParameters; + }; + + // Before diving into how the different passes collect + // their information, we will dive into the main + // specialization logic first. + // + void specializeCall(IRCall* oldCall) + { + // We have an existing call site `oldCall` that + // we know can and should be specialized. + // + // That means the callee should be a known function + // definition, or else `canSpecializeCall` didn't + // correctly check the preconditions. + // + auto oldFunc = as<IRFunc>(oldCall->getCallee()); + SLANG_ASSERT(oldFunc); + SLANG_ASSERT(oldFunc->isDefinition()); + + // Our first information-gathering pass will + // compute the key for the specialized function + // we want to call, and the arguments we will + // use for that call. + // + CallSpecializationInfo callInfo; + gatherCallInfo(oldCall, oldFunc, callInfo); + + // Once we have gathered information on the call, + // we can check if we have an existing specialization + // that we generated before (for another call site) + // that is suitable to this call site. + // + IRFunc* newFunc = nullptr; + if( !specializedFuncs.TryGetValue(callInfo.key, newFunc) ) + { + // If we didn't find a pre-existing specialized + // function, then we will go ahead and create one. + // + // We start by gathering the infromation from the call + // site that is relevant to generating a specialized + // callee function, which we avoided doing earlier + // because it might have been throwaway work. + // + FuncSpecializationInfo funcInfo; + gatherFuncInfo(oldCall, oldFunc, funcInfo); + + // Now we use the gathered information to generate + // a new callee function based on the original + // function and the information we gathered. + // + newFunc = generateSpecializedFunc(oldFunc, funcInfo); + specializedFuncs.Add(callInfo.key, newFunc); + } + + // Once we've other found or generated a specialized function + // we need to generate a call to it, and then use the new + // call as a replacement for the old one. + // + auto newCall = getBuilder()->emitCallInst( + oldCall->getFullType(), + newFunc, + callInfo.newArgs.Count(), + callInfo.newArgs.Buffer()); + + newCall->insertBefore(oldCall); + oldCall->replaceUsesWith(newCall); + oldCall->removeAndDeallocate(); + } + + // Before diving into the details on how we gather information + // and specialize callees, lets stop to think about what we'd + // like to do in terms of individual parameters and arguments. + // + // Suppose we are specializing both a call site C and the callee + // function F, and we are consisering a particular pair of + // a parmeter P of F, and an argument A at the call site. + // + // The full extent of information we might want to know given + // P and A is: + // + // * What arguments need to be added to the specialized call? + // * What parameters need to be added to the specialized callee? + // * What instructions are needed in the body of the specialized + // callee to synthesize the value that will stand in for P? + // * What information, if any, needs to be used to distinguish + // this specialized callee from others that might be generated for F? + // + // An easy case is when P is a parameter that doesn't need + // specialization. In that case: + // + // * The existing argument A shold be used as an argument in + // the specialized call. + // * A clone P' of the existing parameter P shold be used as a + // parameter of the specialized callee. + // * No additional instructions are needed in the body of + // the callee; the cloned parameter P' should stand in for P. + // * No information should be added to the specialization key + // based on P and A. + // + // The more interesting case is when P has a resource type, and + // A is some global shader parameter G. + // + // * No argument should be added at the new call site + // * No parameter should be added to the specialized callee + // * No additional instructions are needed in the body of + // the callee; the global G should stand in for P. + // * The global G should be used to distinguish this specialized + // callee from those that might be specialized for a different + // global shader parameter. + // + // As a final example, imagine that P is still a resource type, + // but A is now an indexing operation into an array: `G[idx]`: + // + // * An argument for `idx` should be added at the call site + // * A parameter `p_idx` with the same type as `idx` should be added + // to the specialized callee. + // * An instruction should be added to the specialized callee + // to compute `G[p_idx]` and use that to stand in for P. + // * The global G should still be used to distinguish this specialized + // call site from others. + // + // That's a lot of examples, I know, but hopefully it gives a + // sense of the information we are tracking and how it differs + // across the various cases. While the example only covered one + // level of indexing, the actual implementation will handle the + // case of arbitrarily many levels of indexing, which can mean + // piping through any number of additional integer parameters + // to the callee. + + // The information we gather for a call site (before we know + // whether a specialize calle is needed) is just the new + // argument list, and the "key" information that distinguishes + // what specialized callee we want/need. + // + void gatherCallInfo( + IRCall* oldCall, + IRFunc* oldFunc, + CallSpecializationInfo& callInfo) + { + // The specialized callee key always needs to include + // the original function, since different functions + // will always yield different specializations. + // + callInfo.key.vals.Add(oldFunc); + + // The rest of the information is gathered by looking + // at parameter and argument pairs. + // + UInt oldArgCounter = 0; + for( auto oldParam : oldFunc->getParams() ) + { + UInt oldArgIndex = oldArgCounter++; + auto oldArg = oldCall->getArg(oldArgIndex); + + getCallInfoForParam(callInfo, oldParam, oldArg); + } + } + + void getCallInfoForParam( + CallSpecializationInfo& ioInfo, + IRParam* oldParam, + IRInst* oldArg) + { + // We know that the case where a parameter + // doesn't need specialization is easy. + // + if( !doesParamNeedSpecialization(oldParam) ) + { + // The new call site will use the same argument + // value as the old one, and we don't need + // to add any information to distinguish the + // specialized callee based on this paramter. + // + ioInfo.newArgs.Add(oldArg); + } + else + { + // If specialization is needed, we need + // to inspect the argument value. This + // is handled with a different function + // because it needs to recurse in some cases. + // + getCallInfoForArg(ioInfo, oldArg); + } + } + + void getCallInfoForArg( + CallSpecializationInfo& ioInfo, + IRInst* oldArg) + { + // The base case we care about is when the original + // argument is a global shader parameter. + // + if( auto oldGlobalParam = as<IRGlobalParam>(oldArg) ) + { + // In this case we don't need to pass anything + // as an argument at the new call site (the + // global parameter will get specialized into + // the callee), but we *do* need to make sure + // that our key for identifying the specialized + // callee reflects that we are specializing + // to the chosen parameter. + // + ioInfo.key.vals.Add(oldGlobalParam); + } + else if( oldArg->op == kIROp_getElement ) + { + // This is the case where the `oldArg` is + // in the form `oldBase[oldIndex]` + // + auto oldBase = oldArg->getOperand(0); + auto oldIndex = oldArg->getOperand(1); + + // Effectively, we act as if `oldBase` and + // `oldIndex` were passed to the callee separately, + // so that `oldBase` is an array-of-resouces and + // `oldIndex` is an ordinary integer argument. + // + // We start by recursively setting up whatever + // `oldBase` needs: + // + getCallInfoForArg(ioInfo, oldBase); + + // Then we process `oldIndex` just like we + // would have an ordinary argument that doesn't + // involve specialization: add its value to + // the arguments at the new call site, and + // don't add anything to the specialization key. + // + ioInfo.newArgs.Add(oldIndex); + } + else + { + // If we fail to match any of the cases above + // then a precondition was violated in that + // `isArgSuitableForSpecialization` is allowing + // a case that this routine is not covering. + // + SLANG_UNEXPECTED("mising case in 'getCallInfoForArg'"); + } + } + + // The remaining information we've discussed is only + // gathered once we decide we want to generate a + // specialized function, but it follows much the same flow. + // + void gatherFuncInfo( + IRCall* oldCall, + IRFunc* oldFunc, + FuncSpecializationInfo& funcInfo) + { + UInt oldArgCounter = 0; + for( auto oldParam : oldFunc->getParams() ) + { + UInt oldArgIndex = oldArgCounter++; + auto oldArg = oldCall->getArg(oldArgIndex); + + // For each parameter and argument pair we will + // frame the main task as producing a value that + // will stand in for the parameter in the specialized + // function. + // + auto newVal = getSpecializedValueForParam(funcInfo, oldParam, oldArg); + + // We will collect the replacement value to use + // for each of the original parameters in an array. + // + funcInfo.replacementsForOldParameters.Add(newVal); + } + } + + IRInst* getSpecializedValueForParam( + FuncSpecializationInfo& ioInfo, + IRParam* oldParam, + IRInst* oldArg) + { + // As always, the easy case is when the parameter of + // the original function doesn't need specialization. + // + if( !doesParamNeedSpecialization(oldParam) ) + { + // The specialized callee will need a new parameter + // that fills the same role as the old one, so we + // create it here. + // + auto newParam = getBuilder()->createParam(oldParam->getFullType()); + ioInfo.newParams.Add(newParam); + + // The new parameter will be used as the replacement + // for the old one in the specialized function. + // + return newParam; + } + else + { + // If the parameter requires specialization, then it + // is time to look at the structure of the argument. + // + return getSpecializedValueForArg(ioInfo, oldArg); + } + } + + IRInst* getSpecializedValueForArg( + FuncSpecializationInfo& ioInfo, + IRInst* oldArg) + { + // The logic here parallels `gatherCallInfoForArg`, + // and only differs in what information it is gathering. + // + // As before, the base case is when we have a global + // shader parameter. + // + if( auto globalParam = as<IRGlobalParam>(oldArg) ) + { + // The specialized function will not need any + // parameter in this case, and the global itself + // should be used to stand in for the original + // parameter in the specialized function. + // + return globalParam; + } + else if( oldArg->op == kIROp_getElement ) + { + // This is the case where the argument is + // in the form `oldBase[oldIndex]`. + // + auto oldBase = oldArg->getOperand(0); + auto oldIndex = oldArg->getOperand(1); + + // In `gatherCallInfoForArg` this case was + // handled by acting as if `oldBase` and + // `oldIndex` were being passed as two + // separate arguments. + // + // We'll follow the same structure here, + // starting by recursively processing `oldBase` + // to get a value that can stand in for it + // in the specialized callee. + // + auto newBase = getSpecializedValueForArg(ioInfo, oldBase); + + // Next we'll process `oldIndex` as if it + // was an ordinary argument (not a specialized one), + // which means creating a parameter to receive its value, + // which will also stand in for `oldIndex` in + // the body of the specialized callee. + // + auto builder = getBuilder(); + auto newIndex = builder->createParam(oldIndex->getFullType()); + ioInfo.newParams.Add(newIndex); + + // Finally, we need to compute a value that + // can stand in for `oldArg` (which was + // `oldBase[oldIndex]`) in the body of the + // specialized callee. + // + // Because we have both a `newBase` and a + // `newIndex` it is natural to construct + // `newBase[newIndex]` and use that. + // + // The only complication is that we need + // to make sure that our IR builder isn't + // set to insert newly created instructions + // anywhere, since the `emit*` functions + // will try to automatically insert new + // instructions if an insertion location + // is set. + // + builder->setInsertInto(nullptr); + auto newVal = builder->emitElementExtract( + oldArg->getFullType(), + newBase, + newIndex); + + // Because our new instruction wasn't + // actually inserted anywhere, we need to + // add it to our gathered list of instructions + // that should be inserted into the body of + // the specialized callee. + // + ioInfo.newBodyInsts.Add(newVal); + + return newVal; + } + else + { + // If we don't match one of the above cases, + // then `isArgSuitableForSpecialization` is + // letting through cases that this function + // hasn't been updated to handle. + // + SLANG_UNEXPECTED("mising case in 'getSpecializedValueForArg'"); + UNREACHABLE_RETURN(nullptr); + } + } + + // Now that we've covered how all the relevant information + // gets gathered, we can turn our attention to the + // meat of actually generating a specialized version + // of a function. + // + // For the most part, this is just a matter of *cloning* + // the original function, while keeping around a mapping + // from original values/instructions to their replacements. + // + // Because we might perform specialization many times, + // it will get is own nested context type. + // + struct CloneContext + { + // When cloning, we need an IR builder to use for + // making new instructions. + // + IRBuilder* builder; + + // We also need a mapping from old instruction to their + // new equivalents, which will serve double duty: + // + // * Before we start cloning, this will be used to + // register the mapping from things that are to be + // replaced entirely (like function parameters to + // be specialized away) to their replacements (like + // a global shader parameter). + // + // * During the process of cloning, this will be + // updated as we clone instructions so that when + // an instruction later in the function refers to + // something from earlier, we can look up the + // replacement. + // + Dictionary<IRInst*, IRInst*> mapOldValToNew; + + // Whenever we need to look up an operand value + // during the cloning process we'll use `cloneOperand`, + // which mostly just uses `mapOldValToNew`. + // + IRInst* cloneOperand(IRInst* oldOperand) + { + IRInst* newOperand = nullptr; + if(mapOldValToNew.TryGetValue(oldOperand, newOperand)) + return newOperand; + + // The one wrinkle here, and the place where + // this cloning logic differs from some other + // IR cloning implementations we have lying around, + // is that when we *don't* find an instruction in + // our map, we automatically assume it is not + // something taht needs to be cloned, so that the old + // value is fine to use as-is. + // + // Note that this puts an ordering constraint on + // our work: if we are going to clone some instruction + // A, then we had better clone it *before* anything + // that uses A as an operand. + // + return oldOperand; + } + + // The SSA property and the way we have structured + // our "phi nodes" (block parameters) means that + // just going through the children of a function, + // and then the children of a block will generally + // do the Right Thing and always visit an instruction + // before its uses. + // + // The big exception to this is that branch instructions + // can refer to blocks later in the same function. + // + // We work around this sort of problem in a fairly + // general fashion, by splitting the cloning of + // an instruction into two steps. + // + // The first step is just to clone the instruction + // and its direct operands, but not any decorations + // or children. + // + IRInst* cloneInstAndOperands(IRInst* oldInst) + { + // In order to clone an instruction we first + // need to map its operands over to their + // new values. + // + List<IRInst*> newOperands; + UInt operandCount = oldInst->getOperandCount(); + for(UInt ii = 0; ii < operandCount; ++ii) + { + auto oldOperand = oldInst->getOperand(ii); + auto newOperand = cloneOperand(oldOperand); + newOperands.Add(newOperand); + } + + // Now we can just tell the IR builder to + // go and create an instruction directly + // + // Note: this logic would not handle any instructions + // with special-case data attached, but that only + // applies to `IRConstant`s at this point, and those + // should only appear at the global scope rather than + // in function bodies. + // + SLANG_ASSERT(!as<IRConstant>(oldInst)); + auto newInst = builder->emitIntrinsicInst( + oldInst->getFullType(), + oldInst->op, + newOperands.Count(), + newOperands.Buffer()); + + return newInst; + } + + // The second phase of cloning an instruction is to clone + // its decorations and children. This step only needs to + // be performed on those instructions that *have* decorations + // and/or children. + // + // The complexity of this step comes from the fact that it + // needs to sequence the two phases of cloning for any + // child instructions. We will do this by performing the + // first phase of cloning, and building up a list of + // children that require the second phase of processing. + // Each entry in that list will be a pair of an old instruction + // and its new clone. + // + struct OldNewPair + { + IRInst* oldInst; + IRInst* newInst; + }; + void cloneInstDecorationsAndChildren(IRInst* oldInst, IRInst* newInst) + { + List<OldNewPair> pairs; + for( auto oldChild : oldInst->getDecorationsAndChildren() ) + { + // As a very subtle special case, if one of the children + // of our `oldInst` already has a registered replacement, + // then we don't want to clone it (not least because + // the `Dictionary::Add` method would give us an error + // when we try to insert a new value for the same key). + // + // This arises for entries in `mapOldValToNew` that were + // seeded before cloning begain (e.g., the function + // parameters that are to be replaced). + // + if(mapOldValToNew.ContainsKey(oldChild)) + continue; + + // Because we are re-using the same IR builder in + // multiple places, we need to make sure to set + // its insertion location before creating the + // child instruction. + // + builder->setInsertInto(newInst); + + // Now we can perform the first phase of cloning + // on the child, and register it in our map from + // old to new values. + // + auto newChild = cloneInstAndOperands(oldChild); + mapOldValToNew.Add(oldChild, newChild); + + // If an only if the old child had decorations + // or children, we will register it into our + // list for processing in the second phase. + // + if( oldChild->getFirstDecorationOrChild() ) + { + OldNewPair pair; + pair.oldInst = oldChild; + pair.newInst = newChild; + pairs.Add(pair); + } + } + + // Once we have done first-phase processing for + // all child instructions, we scan through those + // in the list that required second-phase processing, + // and clone their decorations and/or children recursively. + // + for( auto pair : pairs ) + { + auto oldChild = pair.oldInst; + auto newChild = pair.newInst; + + cloneInstDecorationsAndChildren(oldChild, newChild); + } + } + }; + + // With all of that machinery out of the way, + // we are now prepared to walk through the process of + // specializing a given callee function based on + // the information we have gathered. + // + IRFunc* generateSpecializedFunc( + IRFunc* oldFunc, + FuncSpecializationInfo const& funcInfo) + { + // We start by setting up our context for cloning + // the blocks and instructions in the old function. + // + auto builder = getBuilder(); + CloneContext cloneContext; + cloneContext.builder = builder; + + // Next we iterate over the parameters of the old + // function, and register each as being mapped + // to its replacement in the `funcInfo` that was + // already gathered. + // + UInt paramCounter = 0; + for( auto oldParam : oldFunc->getParams() ) + { + UInt paramIndex = paramCounter++; + auto newVal = funcInfo.replacementsForOldParameters[paramIndex]; + cloneContext.mapOldValToNew.Add(oldParam, newVal); + } + + // Next we will create the skeleton of the new + // specialized function, including its type. + // + // To get the type of the new function we will + // iterate over the collected list of new + // parameters (which may differ greatly from the + // parameter list of the original) and extract + // their types. + // + List<IRType*> paramTypes; + for( auto param : funcInfo.newParams ) + { + paramTypes.Add(param->getFullType()); + } + IRType* funcType = builder->getFuncType( + paramTypes.Count(), + paramTypes.Buffer(), + oldFunc->getResultType()); + + IRFunc* newFunc = builder->createFunc(); + newFunc->setFullType(funcType); + + // The above step has accomplished the "first phase" + // of cloning the function (since `IRFunc`s have no + // operands). + // + // We can now call into our `CloneContext` to perform + // the second phase of cloning, which will recursively + // clone any nested decorations, blocks, and instructions. + // + cloneContext.cloneInstDecorationsAndChildren(oldFunc, newFunc); + + // We are almost done at this point, except that `newFunc` + // is lacking its parameters, as well as any of the body + // instructions that we decided were needed during + // the information-gathering steps. + // + // We will insert these instructions into the first block + // of the function, before its first ordinary instruction. + // We know that these should exist because we had as + // a precondition that `oldFunc` was a definition (so it + // has at least one block), and in valid IR every block + // has at least one ordinary instruction (its terminator). + // + auto newEntryBlock = newFunc->getFirstBlock(); + SLANG_ASSERT(newEntryBlock); + auto newFirstOrdinary = newEntryBlock->getFirstOrdinaryInst(); + SLANG_ASSERT(newFirstOrdinary); + + // We simply iterate over the list of parameters and then + // body instructions that were produced in the information + // gathering step, and insert each before `newFirstOrdinary`, + // which has the effect or arranging them in the output + // in the order they are enumerated here. + // + for( auto newParam : funcInfo.newParams ) + { + newParam->insertBefore(newFirstOrdinary); + } + for( auto newBodyInst : funcInfo.newBodyInsts ) + { + newBodyInst->insertBefore(newFirstOrdinary); + } + + // At this point we've created a new specialized function, + // and as such it may contain call sites that were not + // covered when we built our initial work list. + // + // Before handing the specialized function back to the + // caller, we will make sure to recursively add any + // potentially-specializable call sites to our work list. + // + addCallsToWorkListRec(newFunc); + + return newFunc; + } +}; + +// The top-level function for invoking the specialization pass +// is straighforward. We set up the context object +// and then defer to it for the real work. +// +void specializeResourceParameters( + CompileRequest* compileRequest, + TargetRequest* targetRequest, + IRModule* module) +{ + ResourceParameterSpecializationContext context; + context.compileRequest = compileRequest; + context.targetRequest = targetRequest; + context.module = module; + + context.processModule(); +} + +} // namesapce Slang diff --git a/source/slang/ir-specialize-resources.h b/source/slang/ir-specialize-resources.h new file mode 100644 index 000000000..3d6ead130 --- /dev/null +++ b/source/slang/ir-specialize-resources.h @@ -0,0 +1,24 @@ +// ir-specialize-resources.h +#pragma once + +namespace Slang +{ + class CompileRequest; + class TargetRequest; + struct IRModule; + + /// Specialize calls to functions with resource-type parameters. + /// + /// For any function that has resource-type input parameters that + /// would be invalid on the chosen target, this pass will rewrite + /// any call sites that pass suitable arguments (e.g., direct + /// references to global shader parameters) to instead call + /// a specialized variant of the function that does not have + /// those resource parameters (and instead, e.g, refers to the + /// global shader parameters directly). + /// + void specializeResourceParameters( + CompileRequest* compileRequest, + TargetRequest* targetRequest, + IRModule* module); +} diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp index 0d93957c8..60e983711 100644 --- a/source/slang/ir.cpp +++ b/source/slang/ir.cpp @@ -555,6 +555,23 @@ namespace Slang return entryBlock->getFirstParam(); } + IRParam* IRGlobalValueWithParams::getLastParam() + { + auto entryBlock = getFirstBlock(); + if(!entryBlock) return nullptr; + + return entryBlock->getLastParam(); + } + + IRInstList<IRParam> IRGlobalValueWithParams::getParams() + { + auto entryBlock = getFirstBlock(); + if(!entryBlock) return IRInstList<IRParam>(); + + return entryBlock->getParams(); + } + + // IRFunc IRType* IRFunc::getResultType() { return getDataType()->getResultType(); } @@ -2774,15 +2791,10 @@ namespace Slang } } - - static String getName( + static String createName( IRDumpContext* context, IRInst* value) { - String name = 0; - if (context->mapValueToName.TryGetValue(value, name)) - return name; - if(auto nameHintDecoration = value->findDecoration<IRNameHintDecoration>()) { String nameHint = nameHintDecoration->getName(); @@ -2811,6 +2823,19 @@ namespace Slang } } + static String getName( + IRDumpContext* context, + IRInst* value) + { + String name; + if (context->mapValueToName.TryGetValue(value, name)) + return name; + + name = createName(context, value); + context->mapValueToName.Add(value, name); + return name; + } + static void dumpID( IRDumpContext* context, IRInst* inst) @@ -3747,6 +3772,7 @@ namespace Slang case kIROp_GlobalGenericParam: case kIROp_WitnessTable: case kIROp_WitnessTableEntry: + case kIROp_Block: return false; case kIROp_Nop: @@ -3808,6 +3834,19 @@ namespace Slang return nullptr; } + // + // IRType + // + + IRType* unwrapArray(IRType* type) + { + IRType* t = type; + while( auto arrayType = as<IRArrayTypeBase>(t) ) + { + t = arrayType->getElementType(); + } + return t; + } // // Legalization of entry points for GLSL: @@ -4880,6 +4919,7 @@ namespace Slang void legalizeRayTracingEntryPointParameterForGLSL( GLSLLegalizationContext* context, + IRFunc* func, IRParam* pp, VarLayout* paramLayout) { @@ -4902,6 +4942,31 @@ namespace Slang builder->addLayoutDecoration(globalParam, paramLayout); moveValueBefore(globalParam, builder->getFunc()); pp->replaceUsesWith(globalParam); + + // Because linkage between ray-tracing shaders is + // based on the type of incoming/outgoing payload + // and attribute parameters, it would be an error to + // eliminate the global parameter *even if* it is + // not actually used inside the entry point. + // + // We attach a decoration to the entry point that + // makes note of the dependency, so that steps + // like dead code elimination cannot get rid of + // the parameter. + // + // TODO: We could consider using a structure like + // this for *all* of the entry point parameters + // that get moved to the global scope, since SPIR-V + // ends up requiring such information on an `OpEntryPoint`. + // + // As a further alternative, we could decide to + // keep entry point varying input/outtput attached + // to the parameter list through all of the Slang IR + // steps, and only declare it as global variables at + // the last minute when emitting a GLSL `main` or + // SPIR-V for an entry point. + // + builder->addDependsOnDecoration(func, globalParam); } void legalizeEntryPointParameterForGLSL( @@ -5059,7 +5124,7 @@ namespace Slang case Stage::Intersection: case Stage::Miss: case Stage::RayGeneration: - legalizeRayTracingEntryPointParameterForGLSL(context, pp, paramLayout); + legalizeRayTracingEntryPointParameterForGLSL(context, func, pp, paramLayout); return; } diff --git a/source/slang/ir.h b/source/slang/ir.h index d68b3b8f3..488611675 100644 --- a/source/slang/ir.h +++ b/source/slang/ir.h @@ -459,6 +459,8 @@ struct IRType : IRInst IR_PARENT_ISA(Type) }; +IRType* unwrapArray(IRType* type); + struct IRBasicType : IRType { BaseType getBaseType() { return BaseType(op - kIROp_FirstBasicType); } @@ -991,6 +993,8 @@ struct IRGlobalValueWithParams : IRGlobalValueWithCode // which are actually the parameters of the first // block. IRParam* getFirstParam(); + IRParam* getLastParam(); + IRInstList<IRParam> getParams(); IR_PARENT_ISA(GlobalValueWithParams) }; @@ -1010,6 +1014,8 @@ struct IRFunc : IRGlobalValueWithParams UInt getParamCount(); IRType* getParamType(UInt index); + bool isDefinition() { return getFirstBlock() != nullptr; } + IR_LEAF_ISA(Func) }; diff --git a/source/slang/lower-to-ir.cpp b/source/slang/lower-to-ir.cpp index 18d42feab..74ec35fcd 100644 --- a/source/slang/lower-to-ir.cpp +++ b/source/slang/lower-to-ir.cpp @@ -5516,10 +5516,14 @@ static void lowerEntryPointToIR( } auto loweredEntryPointFunc = ensureDecl(context, entryPointFuncDecl); + // Attach a marker decoraton so that we recognize + // this as an entry point. + auto builder = context->irBuilder; + builder->addEntryPointDecoration(getSimpleVal(context, loweredEntryPointFunc)); + // Now lower all the arguments supplied for global generic // type parameters. // - auto builder = context->irBuilder; builder->setInsertInto(builder->getModule()->getModuleInst()); for (RefPtr<Substitutions> subst = entryPointRequest->globalGenericSubst; subst; subst = subst->outer) { diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj index 427127c05..6ba32f954 100644 --- a/source/slang/slang.vcxproj +++ b/source/slang/slang.vcxproj @@ -182,6 +182,7 @@ <ClInclude Include="glsl.meta.slang.h" /> <ClInclude Include="hlsl.meta.slang.h" /> <ClInclude Include="ir-constexpr.h" /> + <ClInclude Include="ir-dce.h" /> <ClInclude Include="ir-dominators.h" /> <ClInclude Include="ir-inst-defs.h" /> <ClInclude Include="ir-insts.h" /> @@ -190,6 +191,7 @@ <ClInclude Include="ir-restructure.h" /> <ClInclude Include="ir-sccp.h" /> <ClInclude Include="ir-serialize.h" /> + <ClInclude Include="ir-specialize-resources.h" /> <ClInclude Include="ir-ssa.h" /> <ClInclude Include="ir-validate.h" /> <ClInclude Include="ir.h" /> @@ -230,6 +232,7 @@ <ClCompile Include="dxc-support.cpp" /> <ClCompile Include="emit.cpp" /> <ClCompile Include="ir-constexpr.cpp" /> + <ClCompile Include="ir-dce.cpp" /> <ClCompile Include="ir-dominators.cpp" /> <ClCompile Include="ir-legalize-types.cpp" /> <ClCompile Include="ir-missing-return.cpp" /> @@ -237,6 +240,7 @@ <ClCompile Include="ir-restructure.cpp" /> <ClCompile Include="ir-sccp.cpp" /> <ClCompile Include="ir-serialize.cpp" /> + <ClCompile Include="ir-specialize-resources.cpp" /> <ClCompile Include="ir-ssa.cpp" /> <ClCompile Include="ir-validate.cpp" /> <ClCompile Include="ir.cpp" /> @@ -300,4 +304,4 @@ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <ImportGroup Label="ExtensionTargets"> </ImportGroup> -</Project>
\ No newline at end of file +</Project>
\ No newline at end of file diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters index edd51db88..eaafa6e79 100644 --- a/source/slang/slang.vcxproj.filters +++ b/source/slang/slang.vcxproj.filters @@ -12,6 +12,9 @@ <ClInclude Include="..\..\slang.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="check.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="compiler.h"> <Filter>Header Files</Filter> </ClInclude> @@ -42,6 +45,9 @@ <ClInclude Include="ir-constexpr.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="ir-dce.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="ir-dominators.h"> <Filter>Header Files</Filter> </ClInclude> @@ -66,6 +72,9 @@ <ClInclude Include="ir-serialize.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="ir-specialize-resources.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="ir-ssa.h"> <Filter>Header Files</Filter> </ClInclude> @@ -162,9 +171,6 @@ <ClInclude Include="visitor.h"> <Filter>Header Files</Filter> </ClInclude> - <ClInclude Include="check.h"> - <Filter>Header Files</Filter> - </ClInclude> </ItemGroup> <ItemGroup> <ClCompile Include="check.cpp"> @@ -185,6 +191,9 @@ <ClCompile Include="ir-constexpr.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="ir-dce.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="ir-dominators.cpp"> <Filter>Source Files</Filter> </ClCompile> @@ -206,6 +215,9 @@ <ClCompile Include="ir-serialize.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="ir-specialize-resources.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="ir-ssa.cpp"> <Filter>Source Files</Filter> </ClCompile> diff --git a/source/slang/syntax.cpp b/source/slang/syntax.cpp index d354057b2..6e57a7a57 100644 --- a/source/slang/syntax.cpp +++ b/source/slang/syntax.cpp @@ -344,7 +344,7 @@ void Type::accept(IValVisitor* visitor, void* extra) auto arrType = type->AsArrayType(); if (!arrType) return false; - return (ArrayLength->EqualsVal(arrType->ArrayLength) && baseType->Equals(arrType->baseType.Ptr())); + return (areValsEqual(ArrayLength, arrType->ArrayLength) && baseType->Equals(arrType->baseType.Ptr())); } RefPtr<Val> ArrayExpressionType::SubstituteImpl(SubstitutionSet subst, int* ioDiff) diff --git a/source/slang/syntax.h b/source/slang/syntax.h index 5eb40fefb..bd7de74ad 100644 --- a/source/slang/syntax.h +++ b/source/slang/syntax.h @@ -1151,6 +1151,14 @@ namespace Slang SyntaxNodeBase* createInstanceOfSyntaxClassByName( String const& name); + // `Val` + + inline bool areValsEqual(Val* left, Val* right) + { + if(!left || !right) return left == right; + return left->EqualsVal(right); + } + // inline BaseType GetVectorBaseType(VectorExpressionType* vecType) { |
