summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--source/slang/emit.cpp105
-rw-r--r--source/slang/ir-dce.cpp325
-rw-r--r--source/slang/ir-dce.h19
-rw-r--r--source/slang/ir-inst-defs.h7
-rw-r--r--source/slang/ir-insts.h23
-rw-r--r--source/slang/ir-legalize-types.cpp2
-rw-r--r--source/slang/ir-specialize-resources.cpp1087
-rw-r--r--source/slang/ir-specialize-resources.h24
-rw-r--r--source/slang/ir.cpp79
-rw-r--r--source/slang/ir.h6
-rw-r--r--source/slang/lower-to-ir.cpp6
-rw-r--r--source/slang/slang.vcxproj6
-rw-r--r--source/slang/slang.vcxproj.filters18
-rw-r--r--source/slang/syntax.cpp2
-rw-r--r--source/slang/syntax.h8
-rw-r--r--tests/compute/func-cbuffer-param.slang40
-rw-r--r--tests/compute/func-cbuffer-param.slang.expected.txt4
-rw-r--r--tests/compute/func-resource-param.slang35
-rw-r--r--tests/compute/func-resource-param.slang.expected.txt4
-rw-r--r--tests/cross-compile/func-resource-param-array.slang62
-rw-r--r--tests/cross-compile/func-resource-param-array.slang.glsl91
-rw-r--r--tests/ir/string-literal.slang.expected1
22 files changed, 1920 insertions, 34 deletions
diff --git a/source/slang/emit.cpp b/source/slang/emit.cpp
index 39616b3df..f8b14b0f3 100644
--- a/source/slang/emit.cpp
+++ b/source/slang/emit.cpp
@@ -2,9 +2,11 @@
#include "emit.h"
#include "../core/slang-writer.h"
+#include "ir-dce.h"
#include "ir-insts.h"
#include "ir-restructure.h"
#include "ir-restructure-scoping.h"
+#include "ir-specialize-resources.h"
#include "ir-ssa.h"
#include "ir-validate.h"
#include "legalize-types.h"
@@ -5603,21 +5605,56 @@ struct EmitVisitor
emit("}\n");
}
+ /// Emit the array brackets that go on the end of a declaration of the given type.
void emitArrayBrackets(
EmitContext* ctx,
- IRType* type)
+ IRType* inType)
{
SLANG_UNUSED(ctx);
- if(auto arrayType = as<IRArrayType>(type))
- {
- emit("[");
- EmitVal(arrayType->getElementCount(), kEOp_General);
- emit("]");
- }
- else if(auto unsizedArrayType = as<IRUnsizedArrayType>(type))
+ // A declaration may require zero, one, or
+ // more array brackets. When writing out array
+ // brackets from left to right, they represent
+ // the structure of the type from the "outside"
+ // in (that is, if we have a 5-element array of
+ // 3-element arrays we should output `[5][3]`),
+ // because of C-style declarator rules.
+ //
+ // This conveniently means that we can print
+ // out all the array brackets with a looping
+ // rather than a recursive structure.
+ //
+ // We will peel the input type like an onion,
+ // looking at one layer at a time until we
+ // reach a non-array type in the middle.
+ //
+ IRType* type = inType;
+ for(;;)
{
- emit("[]");
+ if(auto arrayType = as<IRArrayType>(type))
+ {
+ emit("[");
+ EmitVal(arrayType->getElementCount(), kEOp_General);
+ emit("]");
+
+ // Continue looping on the next layer in.
+ //
+ type = arrayType->getElementType();
+ }
+ else if(auto unsizedArrayType = as<IRUnsizedArrayType>(type))
+ {
+ emit("[]");
+
+ // Continue looping on the next layer in.
+ //
+ type = unsizedArrayType->getElementType();
+ }
+ else
+ {
+ // This layer wasn't an array, so we are done.
+ //
+ return;
+ }
}
}
@@ -5752,16 +5789,6 @@ struct EmitVisitor
emit(";\n");
}
- IRType* unwrapArray(IRType* type)
- {
- IRType* t = type;
- while( auto arrayType = as<IRArrayTypeBase>(t) )
- {
- t = arrayType->getElementType();
- }
- return t;
- }
-
void emitIRStructuredBuffer_GLSL(
EmitContext* ctx,
IRGlobalParam* varDecl,
@@ -6546,6 +6573,46 @@ String emitEntryPoint(
#endif
validateIRModuleIfEnabled(compileRequest, irModule);
+ // After type legalization and subsequent SSA cleanup we expect
+ // that any resource types passed to functions are exposed
+ // as their own top-level parameters (which might have
+ // resource or array-of-...-resource types).
+ //
+ // Many of our targets place restrictions on how certain
+ // resource types can be used, so that having them as
+ // function parameters is invalid. To clean this up,
+ // we will try to specialize called functions based
+ // on the actual resources that are being passed to them
+ // at specific call sites.
+ //
+ // Because the legalization may depend on what target
+ // we are compiling for (certain things might be okay
+ // for D3D targets that are not okay for Vulkan), we
+ // pass down the target request along with the IR.
+ //
+ specializeResourceParameters(compileRequest, targetRequest, irModule);
+
+#if 0
+ dumpIRIfEnabled(compileRequest, irModule, "AFTER RESOURCE SPECIALIZATION");
+#endif
+ validateIRModuleIfEnabled(compileRequest, irModule);
+
+ // The resource-based specialization pass above
+ // may create specialized versions of functions, but
+ // it does not try to completely eliminate the original
+ // functions, so there might still be invalid code in
+ // our IR module.
+ //
+ // To clean up the code, we will apply a fairly general
+ // dead-code-elimination (DCE) pass that only retains
+ // whatever code is "live."
+ //
+ eliminateDeadCode(compileRequest, irModule);
+#if 0
+ dumpIRIfEnabled(compileRequest, irModule, "AFTER DCE");
+#endif
+ validateIRModuleIfEnabled(compileRequest, irModule);
+
// After all of the required optimization and legalization
// passes have been performed, we can emit target code from
// the IR module.
diff --git a/source/slang/ir-dce.cpp b/source/slang/ir-dce.cpp
new file mode 100644
index 000000000..0f037bfe5
--- /dev/null
+++ b/source/slang/ir-dce.cpp
@@ -0,0 +1,325 @@
+// ir-dce.cpp
+#include "ir-dce.h"
+
+#include "ir.h"
+#include "ir-insts.h"
+
+namespace Slang
+{
+
+struct DeadCodeEliminationContext
+{
+ // This type implements a simple global DCE pass over
+ // an entire module.
+ //
+ // We start with member variables to stand in for
+ // the parameters that were passed to the top-level
+ // `eliminateDeadCode` function.
+ //
+ CompileRequest* compileRequest;
+ IRModule* module;
+
+ // Our overall process is going to be to determine
+ // which instructions in the module are "live"
+ // and then eliminate anything that wasn't found to
+ // be live.
+ //
+ // We will track the liveness state by keeping
+ // a set of all instructions we have so far determined
+ // to be live.
+ //
+ HashSet<IRInst*> liveInsts;
+
+ // Querying whether an instruction has been
+ // determined to be live is easy.
+ //
+ bool isInstLive(IRInst* inst)
+ {
+ // The only wrinkle is that we want to safeguard
+ // against a null instruction (there are some
+ // corner cases where we still construct IR
+ // instructions with a null type).
+ //
+ if(!inst) return false;
+
+ return liveInsts.Contains(inst);
+ }
+
+ // We are going to do an iterative analysis
+ // where we mark instructions we know are
+ // live, and then see if that can help us
+ // identify any other instructions that
+ // must also be live.
+ //
+ // For this, we will use a work list of
+ // instructions that have been marked
+ // as live, but for which we haven't
+ // looked at their impact on other
+ // instructions.
+ //
+ List<IRInst*> workList;
+
+ // When we discover that an instruction seems
+ // to be live, we will add it to our set,
+ // and also the work list, but only if we
+ // haven't done so previously.
+ //
+ void markInstAsLive(IRInst* inst)
+ {
+ // Again, we safeguard against null instructions
+ // just in case.
+ //
+ if(!inst) return;
+
+ if(liveInsts.Contains(inst))
+ return;
+ liveInsts.Add(inst);
+ workList.Add(inst);
+ }
+
+ // Given the basic infrastructrure above, let's
+ // dive into the task of actually finding all
+ // the live code in a module.
+ //
+ void processModule()
+ {
+ // First of all, we know that the root module instruction
+ // should be considered as live, because otherwise
+ // we'd end up eliminating it, so that is a
+ // good place to start.
+ //
+ markInstAsLive(module->getModuleInst());
+
+ // Marking the module as live should have
+ // seeded our work list, so we can now start
+ // processing entries off of our work list
+ // until it goes dry.
+ //
+ while( workList.Count() )
+ {
+ auto inst = workList.Last();
+ workList.RemoveLast();
+
+ // At this point we know that `inst` is live,
+ // and we want to start considering which other
+ // instructions must be live because of that
+ // knowlege.
+ //
+ // A first easy case is that the parent (if any)
+ // of a live instruction had better be live, or
+ // else we might delete the parent, and
+ // the child with it.
+ //
+ markInstAsLive(inst->getParent());
+
+ // Next the type of a live instruction, and all
+ // of its operands must also be live, or else
+ // we won't be able to compute its value.
+ //
+ markInstAsLive(inst->getFullType());
+ UInt operandCount = inst->getOperandCount();
+ for( UInt ii = 0; ii < operandCount; ++ii )
+ {
+ markInstAsLive(inst->getOperand(ii));
+ }
+
+ // Finally, we need to consider the children
+ // and decorations of the instruction.
+ //
+ // Note that just because an instruction is
+ // live doesn't mean its children must be, or
+ // else we'd never eliminate *anything* (we
+ // marked the whole module as live, and everything
+ // is a transitive child of the module).
+ //
+ // Decorations, in contrast, are always live if their
+ // parents are (because we don't want to silently drop
+ // decorations). It is still important to *mark*
+ // decorations as live, because they have operands,
+ // and those operands need to be marked as live.
+ // We will fold decorations into the same loop
+ // as children for simplicity.
+ //
+ // To keep the code here simple, we'll defer the
+ // decision of whether a child (or decoration)
+ // should be live when its parent is to a subroutine.
+ //
+ for( auto child : inst->getDecorationsAndChildren() )
+ {
+ if(shouldInstBeLiveIfParentIsLive(child))
+ {
+ // In this case, we know `inst` is live and
+ // its `child` should be live if its parent is,
+ // so the `child` must be live too.
+ //
+ markInstAsLive(child);
+ }
+ }
+ }
+
+ // If our work list runs dry, that means we've reached a steady
+ // state where everything that is transitively relevant to
+ // the "outputs" of the module has been marked as live.
+ //
+ // Now we can simply walk through all of our instructions
+ // recursively and eliminate those that are "dead" by
+ // virtue of not having been found live.
+ //
+ eliminateDeadInstsRec(module->getModuleInst());
+ }
+
+ void eliminateDeadInstsRec(IRInst* inst)
+ {
+ // Given the instruction `inst` we need to eliminate
+ // any dead code at, or under it.
+ //
+ // The easy case is if `inst` is dead (that is, not live).
+ //
+ if( !isInstLive(inst) )
+ {
+ // We can simply remove and deallocate `inst` because it is
+ // dead, and not worry about any of its descendents,
+ // because they must have been dead too (since we always
+ // mark the parent of a live instruction as live).
+ //
+ inst->removeAndDeallocate();
+ }
+ else
+ {
+ // If `inst` is live, then we need to deal with the possibility
+ // that its children/decorations (or descendents in general)
+ // might still be dead.
+ //
+ // The biggest wrinkle is that we walk the linked list of
+ // children/decorations a bit carefully, using a temporary
+ // to hold the next node, in case we eliminate one of
+ // the children as we go.
+ //
+ IRInst* next = nullptr;
+ for( IRInst* child = inst->getFirstDecorationOrChild(); child; child = next )
+ {
+ next = child->getNextInst();
+ eliminateDeadInstsRec(child);
+ }
+ }
+ }
+
+ // Now we come to the decision procedure we put off before:
+ // should a given `inst` be live if its parent is?
+ //
+ bool shouldInstBeLiveIfParentIsLive(IRInst* inst)
+ {
+ // The main source of confusion/complexity here is that
+ // we are using the same routine to decide:
+ //
+ // * Should some ordinary instruction in a basic block be kept around?
+ // * Should a basic block in some function be kept around?
+ // * Should a function/type/variable in a module be kept around?
+ //
+ // Still, there are a few basic patterns we can observe.
+ // First, if `inst` is an instruction that might have some effects
+ // when it is executed, then we should keep it around.
+ //
+ if(inst->mightHaveSideEffects())
+ return true;
+ //
+ // The `mightHaveSideEffects` query is conservative, and will
+ // return `true` as its default mode, so once we are past that
+ // query we know that `inst` is either something "structural"
+ // (that makes up the program) rather than executable, or it
+ // is executable but was on a white list of things that are
+ // safe to eliminate.
+
+ // Most top-level objects (functions, types, etc.) obviously
+ // do *not* have side effects. That creates the risk that
+ // we'll just go ahead and eliminate every single function/type
+ // in a module. There needs to be a way to identify the
+ // functions we want to keep around, and for right now
+ // that is handled with the `[entryPoint]` decoration.
+ //
+ if(inst->findDecorationImpl(kIROp_EntryPointDecoration))
+ return true;
+ //
+ // TODO: Eventually it would make sense to consider everything
+ // with an `[export(...)]` decoration as live, but our current
+ // approach to linking for back-end compilation leaves many
+ // linkage decorations in place that we seemingly don't need/want.
+
+ // A basic block is an interesting case. Knowing that a function
+ // is live means that its entry block is live, but the liveness
+ // of any other blocks is determined by whether they are referenced
+ // by other instructions (e.g., a branch from one block to
+ // another).
+ //
+ if( auto block = as<IRBlock>(inst) )
+ {
+ // To determine whether this is the first block in its
+ // parent function (or what-have-you) we can simply
+ // check if there is a previous block before it.
+ //
+ auto prevBlock = block->getPrevBlock();
+ return prevBlock == nullptr;
+ }
+
+ // There are a few special cases of "structural" instructions
+ // that we don't want to eliminate, so we'll check for those next.
+ //
+ switch( inst->op )
+ {
+ // Function parameters obviously shouldn't get eliminated,
+ // even if nothing references them, and block parameters
+ // (phi nodes) will be considered live when their block is,
+ // just so that we don't have to deal with any complications
+ // around re-writing the relevant inter-block argument passing.
+ //
+ // TODO: A smarter DCE pass could deal with this case more
+ // carefully, or we could improve the interprocedural SCCP
+ // pass to deal with block parameters instead.
+ //
+ case kIROp_Param:
+ return true;
+
+ // IR struct types and witness tables are currently kludged
+ // so that they have child instructions that represent their
+ // entries (effectively `(key,value)` pairs), and those child
+ // instructions are never directly referenced (e.g., an access
+ // to a struct field references the *key* but not the `(key,value)`
+ // pair that is the `IRField` instruction.
+ //
+ // TODO: at some point the IR should use a different representation
+ // for struct types and witness tables that does away with
+ // this problem.
+ //
+ case kIROp_StructField:
+ case kIROp_WitnessTableEntry:
+ return true;
+
+ default:
+ break;
+ }
+
+ // If none of the explicit cases above matched, then we will consider
+ // the instruction to not be live just because its parent is. Further
+ // analysis could still lead to a change in the status of `inst`, if
+ // an instruction that uses it as an operand is marked live.
+ //
+ return false;
+ }
+};
+
+// The top-level function for invoking the DCE pass
+// is straighforward. We set up the context object
+// and then defer to it for the real work.
+//
+void eliminateDeadCode(
+ CompileRequest* compileRequest,
+ IRModule* module)
+{
+ DeadCodeEliminationContext context;
+ context.compileRequest = compileRequest;
+ context.module = module;
+
+ context.processModule();
+}
+
+}
diff --git a/source/slang/ir-dce.h b/source/slang/ir-dce.h
new file mode 100644
index 000000000..fd56616d9
--- /dev/null
+++ b/source/slang/ir-dce.h
@@ -0,0 +1,19 @@
+// ir-dce.h
+#pragma once
+
+namespace Slang
+{
+ class CompileRequest;
+ struct IRModule;
+
+ /// Eliminate "dead" code from the given IR module.
+ ///
+ /// This pass is primarily designed for flow-insensitive
+ /// "global" dead code elimination (DCE), such as removing
+ /// types that are unused, functions that are never called,
+ /// etc.
+ ///
+ void eliminateDeadCode(
+ CompileRequest* compileRequest,
+ IRModule* module);
+}
diff --git a/source/slang/ir-inst-defs.h b/source/slang/ir-inst-defs.h
index 69940a79d..ee390b97b 100644
--- a/source/slang/ir-inst-defs.h
+++ b/source/slang/ir-inst-defs.h
@@ -382,6 +382,13 @@ INST(HighLevelDeclDecoration, highLevelDecl, 1, 0)
INST(GloballyCoherentDecoration, globallyCoherent, 0, 0)
INST(PatchConstantFuncDecoration, patchConstantFunc, 1, 0)
+ /// An `[entryPoint]` decoration marks a function that represents a shader entry point.
+ INST(EntryPointDecoration, entryPoint, 0, 0)
+
+ /// A `[dependsOn(x)]` decoration indicates that the parent instruction depends on `x`
+ /// even if it does not otherwise reference it.
+ INST(DependsOnDecoration, dependsOn, 1, 0)
+
/* LinkageDecoration */
INST(ImportDecoration, import, 1, 0)
INST(ExportDecoration, export, 1, 0)
diff --git a/source/slang/ir-insts.h b/source/slang/ir-insts.h
index 737675d87..26d5bcf05 100644
--- a/source/slang/ir-insts.h
+++ b/source/slang/ir-insts.h
@@ -297,7 +297,7 @@ struct IRLookupWitnessTable : IRInst
struct IRCall : IRInst
{
- IRUse func;
+ IR_LEAF_ISA(Call)
IRInst* getCallee() { return getOperand(0); }
@@ -996,6 +996,11 @@ struct IRBuilder
IRDecoration* addDecoration(IRInst* value, IROp op, IRInst* const* operands, Int operandCount);
+ IRDecoration* addDecoration(IRInst* value, IROp op)
+ {
+ return addDecoration(value, op, (IRInst* const*) nullptr, 0);
+ }
+
IRDecoration* addDecoration(IRInst* value, IROp op, IRInst* operand)
{
return addDecoration(value, op, &operand, 1);
@@ -1087,6 +1092,22 @@ struct IRBuilder
{
addDecoration(value, kIROp_ExportDecoration, getStringValue(mangledName));
}
+
+ void addEntryPointDecoration(IRInst* value)
+ {
+ addDecoration(value, kIROp_EntryPointDecoration);
+ }
+
+ /// Add a decoration that indicates that the given `inst` depends on the given `dependency`.
+ ///
+ /// This decoration can be used to ensure that a value that an instruction
+ /// implicitly depends on cannot be eliminated so long as the instruction
+ /// itself is kept alive.
+ ///
+ void addDependsOnDecoration(IRInst* inst, IRInst* dependency)
+ {
+ addDecoration(inst, kIROp_DependsOnDecoration, dependency);
+ }
};
// Helper to establish the source location that will be used
diff --git a/source/slang/ir-legalize-types.cpp b/source/slang/ir-legalize-types.cpp
index a97cc0393..ec9baed32 100644
--- a/source/slang/ir-legalize-types.cpp
+++ b/source/slang/ir-legalize-types.cpp
@@ -219,7 +219,7 @@ static LegalVal legalizeCall(
return LegalVal::simple(context->builder->emitCallInst(
callInst->getFullType(),
- callInst->func.get(),
+ callInst->getCallee(),
instArgs.Count(),
instArgs.Buffer()));
}
diff --git a/source/slang/ir-specialize-resources.cpp b/source/slang/ir-specialize-resources.cpp
new file mode 100644
index 000000000..e6d4351f2
--- /dev/null
+++ b/source/slang/ir-specialize-resources.cpp
@@ -0,0 +1,1087 @@
+// ir-specialize-resources.cpp
+#include "ir-specialize-resources.h"
+
+#include "ir.h"
+#include "ir-insts.h"
+
+namespace Slang
+{
+
+struct ResourceParameterSpecializationContext
+{
+ // This type implements a pass to specialize functions
+ // with resource parameters to ensure that they are
+ // legal for a given target.
+ //
+ // We start with member variables to stand in for
+ // the parameters that were passed to the top-level
+ // `specializeResourceParameters` function.
+ //
+ CompileRequest* compileRequest;
+ TargetRequest* targetRequest;
+ IRModule* module;
+
+ // Our general approach will be to think in terms
+ // of specializing call sites, which amount to
+ // `IRCall` instructions. We will keep a work list
+ // of call sites in the program that may be worth
+ // considering for specialization.
+ //
+ List<IRCall*> workList;
+
+ // Because we may need to generate specialized functions
+ // and generate new calls to those functions, we'll
+ // need some IR building state to get our work done.
+ //
+ SharedIRBuilder sharedBuilderStorage;
+ IRBuilder builderStorage;
+ IRBuilder* getBuilder() { return &builderStorage; }
+
+ // With the basic state out of the way, let's walk
+ // through the overall flow of the pass.
+ //
+ void processModule()
+ {
+ // We will start by initializing our IR building state.
+ //
+ sharedBuilderStorage.module = module;
+ sharedBuilderStorage.session = module->getSession();
+ builderStorage.sharedBuilder = &sharedBuilderStorage;
+
+ // Next we will populate our initial work list by
+ // recursively finding every single call site in the module.
+ //
+ addCallsToWorkListRec(module->getModuleInst());
+
+ // We will process the work list until it goes dry,
+ // treating it like a stack of work items.
+ //
+ while( workList.Count() )
+ {
+ auto call = workList.Last();
+ workList.RemoveLast();
+
+ // At each call site we first check whether it
+ // is something we can (and should) specialize,
+ // and if so, do it. The process of specializing
+ // a function may introduce new call sites that
+ // become candidates for specialization, so
+ // our work list may grow along the way.
+ //
+ if( canSpecializeCall(call) )
+ {
+ specializeCall(call);
+ }
+ }
+ }
+
+ // Setting up the work list is a simple recursive procedure.
+ //
+ void addCallsToWorkListRec(IRInst* inst)
+ {
+ // If we have a call site, then add it to the list.
+ //
+ if( auto call = as<IRCall>(inst) )
+ {
+ workList.Add(call);
+ }
+
+ // Recursively walk through any children, to
+ // see if we uncover more call sites.
+ //
+ for( auto child : inst->getChildren() )
+ {
+ addCallsToWorkListRec(child);
+ }
+ }
+
+ // We need a way to decide for a given call site
+ // whether we can/must specialize it.
+ //
+ bool canSpecializeCall(IRCall* call)
+ {
+ // We can only specialize calls where the callee
+ // func can be statically identified, and where
+ // the callee is a definition (with body) rather
+ // than a declaration. Otherwise there is no
+ // way to generate a specialized callee function.
+ //
+ auto func = as<IRFunc>(call->getCallee());
+ if(!func)
+ return false;
+ if(!func->isDefinition())
+ return false;
+
+ // With the basic checks out of the way, there are
+ // two conditions we care about:
+ //
+ // 1. Should we specialize? This amounts to whether
+ // `func` has any parameters that need specialization.
+ // We will call those "specializable" parameters for
+ // lack of a better name.
+ //
+ // 2. Can we specialize? This amounts to whether the
+ // arguments in `call` that correspond to those
+ // specializable parameters are "suitable" for use
+ // in specialization.
+ //
+ // We are going to answer both of these queries in
+ // a single loop that walks over the parameters of
+ // `func` as well as the arguments to `call`.
+ //
+ // The loop may seem a bit awkward because we are
+ // doing a parallel iteration over a linked list
+ // (the parameters of `func`) and an array (the
+ // arguments of `call`).
+ //
+ bool anySpecializableParam = false;
+ UInt argCounter = 0;
+ for( auto param : func->getParams() )
+ {
+ UInt argIndex = argCounter++;
+ SLANG_ASSERT(argIndex < call->getArgCount());
+ auto arg = call->getArg(argIndex);
+
+ // If the given parameter doesn't need specialization,
+ // then we need to keep looking.
+ //
+ if(!doesParamNeedSpecialization(param))
+ continue;
+
+ // If we have run into a `param` that needs specialization,
+ // then our first condition is met.
+ //
+ anySpecializableParam = true;
+
+ // Now we need to check whether `arg` is actually suitable
+ // for specialization (our second condition). If not, we
+ // can bail out immediately because our second condition
+ // cannot be met.
+ //
+ if(!isArgSuitableForSpecialization(arg))
+ return false;
+ }
+
+ // If we exit the loop, then the second condition must have
+ // been met (all the arguments for specializable parameters
+ // were suitable for specialization), and the result of the
+ // query comes down to the first condition.
+ //
+ return anySpecializableParam;
+ }
+
+ // Of course, now we need to back-fill the predicates that
+ // the above function used to evaluate prameters and arguments.
+
+ bool doesParamNeedSpecialization(IRParam* param)
+ {
+ // Whether or not a parameter needs specialization is really
+ // a function of its type:
+ //
+ IRType* type = param->getDataType();
+
+ // What's more, if a parameter of type `T` would need
+ // specialization, then it seems clear that a parameter
+ // of type "array of `T`" would also need specialization.
+ // We will "unwrap" any outer arrays from the parameter
+ // type before moving on, since they won't affect
+ // our decision.
+ //
+ type = unwrapArray(type);
+
+ // On all of our (current) targets, a function that
+ // takes a `ConstantBuffer<T>` parameter requires
+ // specialization. Surprisingly this includes DXIL
+ // because dxc apparently does not treat `ConstantBuffer<T>`
+ // as a first-class type.
+ //
+ if(as<IRUniformParameterGroupType>(type))
+ return true;
+
+ // For GL/Vulkan targets, we also need to specialize
+ // any parameters that use structured or byte-addressed
+ // buffers.
+ //
+ if( isKhronosTarget(targetRequest) )
+ {
+ if(as<IRHLSLStructuredBufferTypeBase>(type))
+ return true;
+ if(as<IRByteAddressBufferTypeBase>(type))
+ return true;
+ }
+
+ // For now, we will not treat any other parameters as
+ // needing specialization, even if they use resource
+ // types like `Texure2D`, because these are allowed
+ // as function parameters in both HLSL and GLSL.
+ //
+ // TODO: Eventually, if we start generating SPIR-V
+ // directly rather than through glslang, we will need
+ // to specialize *all* resource-type parameters
+ // to follow the restrictions in the spec.
+ //
+ // TODO: We may want to perform more aggressive
+ // specialization in general, especially insofar
+ // as it could simplify the task of supporting
+ // functions with resource-type outputs.
+
+ return false;
+ }
+
+ bool isArgSuitableForSpecialization(IRInst* inArg)
+ {
+ // Determining if an argument is suitable for
+ // specializing a callee function requires
+ // looking at its (recurisve) structure.
+ //
+ // Rather than write a recursively procedure
+ // here, we will be tail-recursive by using
+ // a simple loop.
+ //
+ IRInst* arg = inArg;
+ for(;;)
+ {
+ // The leaf case we care about is when the
+ // argument at the call site is a global
+ // shader parameter, because then we can
+ // specialize a callee to refer to the same
+ // global parameter directly.
+ //
+ if(as<IRGlobalParam>(arg)) return true;
+
+ // As we will see later, we can also
+ // specialize a call when the argument
+ // is the result of indexing into an
+ // array (`base[index]`) *if* the `base`
+ // of the indexing operation is also
+ // suitable for specialization.
+ //
+ if( arg->op == kIROp_getElement )
+ {
+ auto base = arg->getOperand(0);
+
+ // We will "recurse" on the base of
+ // the indexing operation by continuing
+ // our loop with the `base` as our new
+ // argument.
+ //
+ arg = base;
+ continue;
+ }
+
+ // By default, we will *not* consider an argument
+ // suitable for specialization.
+ //
+ // TODO: There may be other cases that are worth
+ // handling here. The current code is based on
+ // observation of what simple shaders do in
+ // practice.
+ //
+ return false;
+ }
+ }
+
+ // Once we'e determined that a given call site can/should
+ // be specialized, we need to perform the actual specialization.
+ // This is where things are going to get more involved.
+ //
+ // There are a few different concerns we need to deal with
+ // that mean we end up having two different passes that walk
+ // over the parameters/arguments of the call (in addition to
+ // the ones we had above for determining if we can/should
+ // specialize in the first place).
+ //
+ // The first of the two passes determines information
+ // relevant to the call site, comprising both the arguments
+ // that will be passed to the specialized function as
+ // well as a "key" to identify the specialized function
+ // that is required.
+ //
+ // The key type is similar to that used for generic specialization
+ // elsewhere in the IR code. It might be worth pulling this
+ // notion out somewhere more centralized, but we are dealing
+ // with the code duplication for now.
+ //
+ struct Key
+ {
+ // The structure of a specialization key will be a list
+ // of instructions starting with the function to be specialized,
+ // and then having one or more entries for each parameter
+ // that is being specialized to indicate the value to which
+ // it is being specialized (e.g. the global shader parameter).
+ //
+ List<IRInst*> vals;
+
+ // In order to use this type as a `Dictionary` key we
+ // need it to support equality and hashing, but the
+ // implementaitons are straightforward.
+ //
+ // TODO: honestly we might consider having `GetHashCode`
+ // and `operator==` defined for `List<T>`.
+
+ bool operator==(Key const& other) const
+ {
+ auto valCount = vals.Count();
+ if(valCount != other.vals.Count()) return false;
+ for( UInt ii = 0; ii < valCount; ++ii )
+ {
+ if(vals[ii] != other.vals[ii]) return false;
+ }
+ return true;
+ }
+
+ int GetHashCode() const
+ {
+ auto valCount = vals.Count();
+ int hash = Slang::GetHashCode(valCount);
+ for( UInt ii = 0; ii < valCount; ++ii )
+ {
+ hash = combineHash(hash, Slang::GetHashCode(vals[ii]));
+ }
+ return hash;
+ }
+ };
+
+ // As indicated above, the information we collect about a call
+ // site consists of the key for the specialized function we
+ // will call, and a list of the arguments that will be passed
+ // to the call.
+ //
+ struct CallSpecializationInfo
+ {
+ Key key;
+ List<IRInst*> newArgs;
+ };
+
+ // Once we've collected the information about a call site
+ // we can use a dictionary to see if we already created
+ // a specialized version of the callee that matches its
+ // requirements.
+ //
+ Dictionary<Key, IRFunc*> specializedFuncs;
+
+ // If the dictionary didn't have a specialized function
+ // suitable for a call site, we need a second information-gathering
+ // pass to decide what the new parameters of the specialized
+ // functions should be, and what instructions the new function
+ // must execute in its body to set up the replacements for the
+ // old parameters.
+ //
+ struct FuncSpecializationInfo
+ {
+ List<IRParam*> newParams;
+ List<IRInst*> newBodyInsts;
+ List<IRInst*> replacementsForOldParameters;
+ };
+
+ // Before diving into how the different passes collect
+ // their information, we will dive into the main
+ // specialization logic first.
+ //
+ void specializeCall(IRCall* oldCall)
+ {
+ // We have an existing call site `oldCall` that
+ // we know can and should be specialized.
+ //
+ // That means the callee should be a known function
+ // definition, or else `canSpecializeCall` didn't
+ // correctly check the preconditions.
+ //
+ auto oldFunc = as<IRFunc>(oldCall->getCallee());
+ SLANG_ASSERT(oldFunc);
+ SLANG_ASSERT(oldFunc->isDefinition());
+
+ // Our first information-gathering pass will
+ // compute the key for the specialized function
+ // we want to call, and the arguments we will
+ // use for that call.
+ //
+ CallSpecializationInfo callInfo;
+ gatherCallInfo(oldCall, oldFunc, callInfo);
+
+ // Once we have gathered information on the call,
+ // we can check if we have an existing specialization
+ // that we generated before (for another call site)
+ // that is suitable to this call site.
+ //
+ IRFunc* newFunc = nullptr;
+ if( !specializedFuncs.TryGetValue(callInfo.key, newFunc) )
+ {
+ // If we didn't find a pre-existing specialized
+ // function, then we will go ahead and create one.
+ //
+ // We start by gathering the infromation from the call
+ // site that is relevant to generating a specialized
+ // callee function, which we avoided doing earlier
+ // because it might have been throwaway work.
+ //
+ FuncSpecializationInfo funcInfo;
+ gatherFuncInfo(oldCall, oldFunc, funcInfo);
+
+ // Now we use the gathered information to generate
+ // a new callee function based on the original
+ // function and the information we gathered.
+ //
+ newFunc = generateSpecializedFunc(oldFunc, funcInfo);
+ specializedFuncs.Add(callInfo.key, newFunc);
+ }
+
+ // Once we've other found or generated a specialized function
+ // we need to generate a call to it, and then use the new
+ // call as a replacement for the old one.
+ //
+ auto newCall = getBuilder()->emitCallInst(
+ oldCall->getFullType(),
+ newFunc,
+ callInfo.newArgs.Count(),
+ callInfo.newArgs.Buffer());
+
+ newCall->insertBefore(oldCall);
+ oldCall->replaceUsesWith(newCall);
+ oldCall->removeAndDeallocate();
+ }
+
+ // Before diving into the details on how we gather information
+ // and specialize callees, lets stop to think about what we'd
+ // like to do in terms of individual parameters and arguments.
+ //
+ // Suppose we are specializing both a call site C and the callee
+ // function F, and we are consisering a particular pair of
+ // a parmeter P of F, and an argument A at the call site.
+ //
+ // The full extent of information we might want to know given
+ // P and A is:
+ //
+ // * What arguments need to be added to the specialized call?
+ // * What parameters need to be added to the specialized callee?
+ // * What instructions are needed in the body of the specialized
+ // callee to synthesize the value that will stand in for P?
+ // * What information, if any, needs to be used to distinguish
+ // this specialized callee from others that might be generated for F?
+ //
+ // An easy case is when P is a parameter that doesn't need
+ // specialization. In that case:
+ //
+ // * The existing argument A shold be used as an argument in
+ // the specialized call.
+ // * A clone P' of the existing parameter P shold be used as a
+ // parameter of the specialized callee.
+ // * No additional instructions are needed in the body of
+ // the callee; the cloned parameter P' should stand in for P.
+ // * No information should be added to the specialization key
+ // based on P and A.
+ //
+ // The more interesting case is when P has a resource type, and
+ // A is some global shader parameter G.
+ //
+ // * No argument should be added at the new call site
+ // * No parameter should be added to the specialized callee
+ // * No additional instructions are needed in the body of
+ // the callee; the global G should stand in for P.
+ // * The global G should be used to distinguish this specialized
+ // callee from those that might be specialized for a different
+ // global shader parameter.
+ //
+ // As a final example, imagine that P is still a resource type,
+ // but A is now an indexing operation into an array: `G[idx]`:
+ //
+ // * An argument for `idx` should be added at the call site
+ // * A parameter `p_idx` with the same type as `idx` should be added
+ // to the specialized callee.
+ // * An instruction should be added to the specialized callee
+ // to compute `G[p_idx]` and use that to stand in for P.
+ // * The global G should still be used to distinguish this specialized
+ // call site from others.
+ //
+ // That's a lot of examples, I know, but hopefully it gives a
+ // sense of the information we are tracking and how it differs
+ // across the various cases. While the example only covered one
+ // level of indexing, the actual implementation will handle the
+ // case of arbitrarily many levels of indexing, which can mean
+ // piping through any number of additional integer parameters
+ // to the callee.
+
+ // The information we gather for a call site (before we know
+ // whether a specialize calle is needed) is just the new
+ // argument list, and the "key" information that distinguishes
+ // what specialized callee we want/need.
+ //
+ void gatherCallInfo(
+ IRCall* oldCall,
+ IRFunc* oldFunc,
+ CallSpecializationInfo& callInfo)
+ {
+ // The specialized callee key always needs to include
+ // the original function, since different functions
+ // will always yield different specializations.
+ //
+ callInfo.key.vals.Add(oldFunc);
+
+ // The rest of the information is gathered by looking
+ // at parameter and argument pairs.
+ //
+ UInt oldArgCounter = 0;
+ for( auto oldParam : oldFunc->getParams() )
+ {
+ UInt oldArgIndex = oldArgCounter++;
+ auto oldArg = oldCall->getArg(oldArgIndex);
+
+ getCallInfoForParam(callInfo, oldParam, oldArg);
+ }
+ }
+
+ void getCallInfoForParam(
+ CallSpecializationInfo& ioInfo,
+ IRParam* oldParam,
+ IRInst* oldArg)
+ {
+ // We know that the case where a parameter
+ // doesn't need specialization is easy.
+ //
+ if( !doesParamNeedSpecialization(oldParam) )
+ {
+ // The new call site will use the same argument
+ // value as the old one, and we don't need
+ // to add any information to distinguish the
+ // specialized callee based on this paramter.
+ //
+ ioInfo.newArgs.Add(oldArg);
+ }
+ else
+ {
+ // If specialization is needed, we need
+ // to inspect the argument value. This
+ // is handled with a different function
+ // because it needs to recurse in some cases.
+ //
+ getCallInfoForArg(ioInfo, oldArg);
+ }
+ }
+
+ void getCallInfoForArg(
+ CallSpecializationInfo& ioInfo,
+ IRInst* oldArg)
+ {
+ // The base case we care about is when the original
+ // argument is a global shader parameter.
+ //
+ if( auto oldGlobalParam = as<IRGlobalParam>(oldArg) )
+ {
+ // In this case we don't need to pass anything
+ // as an argument at the new call site (the
+ // global parameter will get specialized into
+ // the callee), but we *do* need to make sure
+ // that our key for identifying the specialized
+ // callee reflects that we are specializing
+ // to the chosen parameter.
+ //
+ ioInfo.key.vals.Add(oldGlobalParam);
+ }
+ else if( oldArg->op == kIROp_getElement )
+ {
+ // This is the case where the `oldArg` is
+ // in the form `oldBase[oldIndex]`
+ //
+ auto oldBase = oldArg->getOperand(0);
+ auto oldIndex = oldArg->getOperand(1);
+
+ // Effectively, we act as if `oldBase` and
+ // `oldIndex` were passed to the callee separately,
+ // so that `oldBase` is an array-of-resouces and
+ // `oldIndex` is an ordinary integer argument.
+ //
+ // We start by recursively setting up whatever
+ // `oldBase` needs:
+ //
+ getCallInfoForArg(ioInfo, oldBase);
+
+ // Then we process `oldIndex` just like we
+ // would have an ordinary argument that doesn't
+ // involve specialization: add its value to
+ // the arguments at the new call site, and
+ // don't add anything to the specialization key.
+ //
+ ioInfo.newArgs.Add(oldIndex);
+ }
+ else
+ {
+ // If we fail to match any of the cases above
+ // then a precondition was violated in that
+ // `isArgSuitableForSpecialization` is allowing
+ // a case that this routine is not covering.
+ //
+ SLANG_UNEXPECTED("mising case in 'getCallInfoForArg'");
+ }
+ }
+
+ // The remaining information we've discussed is only
+ // gathered once we decide we want to generate a
+ // specialized function, but it follows much the same flow.
+ //
+ void gatherFuncInfo(
+ IRCall* oldCall,
+ IRFunc* oldFunc,
+ FuncSpecializationInfo& funcInfo)
+ {
+ UInt oldArgCounter = 0;
+ for( auto oldParam : oldFunc->getParams() )
+ {
+ UInt oldArgIndex = oldArgCounter++;
+ auto oldArg = oldCall->getArg(oldArgIndex);
+
+ // For each parameter and argument pair we will
+ // frame the main task as producing a value that
+ // will stand in for the parameter in the specialized
+ // function.
+ //
+ auto newVal = getSpecializedValueForParam(funcInfo, oldParam, oldArg);
+
+ // We will collect the replacement value to use
+ // for each of the original parameters in an array.
+ //
+ funcInfo.replacementsForOldParameters.Add(newVal);
+ }
+ }
+
+ IRInst* getSpecializedValueForParam(
+ FuncSpecializationInfo& ioInfo,
+ IRParam* oldParam,
+ IRInst* oldArg)
+ {
+ // As always, the easy case is when the parameter of
+ // the original function doesn't need specialization.
+ //
+ if( !doesParamNeedSpecialization(oldParam) )
+ {
+ // The specialized callee will need a new parameter
+ // that fills the same role as the old one, so we
+ // create it here.
+ //
+ auto newParam = getBuilder()->createParam(oldParam->getFullType());
+ ioInfo.newParams.Add(newParam);
+
+ // The new parameter will be used as the replacement
+ // for the old one in the specialized function.
+ //
+ return newParam;
+ }
+ else
+ {
+ // If the parameter requires specialization, then it
+ // is time to look at the structure of the argument.
+ //
+ return getSpecializedValueForArg(ioInfo, oldArg);
+ }
+ }
+
+ IRInst* getSpecializedValueForArg(
+ FuncSpecializationInfo& ioInfo,
+ IRInst* oldArg)
+ {
+ // The logic here parallels `gatherCallInfoForArg`,
+ // and only differs in what information it is gathering.
+ //
+ // As before, the base case is when we have a global
+ // shader parameter.
+ //
+ if( auto globalParam = as<IRGlobalParam>(oldArg) )
+ {
+ // The specialized function will not need any
+ // parameter in this case, and the global itself
+ // should be used to stand in for the original
+ // parameter in the specialized function.
+ //
+ return globalParam;
+ }
+ else if( oldArg->op == kIROp_getElement )
+ {
+ // This is the case where the argument is
+ // in the form `oldBase[oldIndex]`.
+ //
+ auto oldBase = oldArg->getOperand(0);
+ auto oldIndex = oldArg->getOperand(1);
+
+ // In `gatherCallInfoForArg` this case was
+ // handled by acting as if `oldBase` and
+ // `oldIndex` were being passed as two
+ // separate arguments.
+ //
+ // We'll follow the same structure here,
+ // starting by recursively processing `oldBase`
+ // to get a value that can stand in for it
+ // in the specialized callee.
+ //
+ auto newBase = getSpecializedValueForArg(ioInfo, oldBase);
+
+ // Next we'll process `oldIndex` as if it
+ // was an ordinary argument (not a specialized one),
+ // which means creating a parameter to receive its value,
+ // which will also stand in for `oldIndex` in
+ // the body of the specialized callee.
+ //
+ auto builder = getBuilder();
+ auto newIndex = builder->createParam(oldIndex->getFullType());
+ ioInfo.newParams.Add(newIndex);
+
+ // Finally, we need to compute a value that
+ // can stand in for `oldArg` (which was
+ // `oldBase[oldIndex]`) in the body of the
+ // specialized callee.
+ //
+ // Because we have both a `newBase` and a
+ // `newIndex` it is natural to construct
+ // `newBase[newIndex]` and use that.
+ //
+ // The only complication is that we need
+ // to make sure that our IR builder isn't
+ // set to insert newly created instructions
+ // anywhere, since the `emit*` functions
+ // will try to automatically insert new
+ // instructions if an insertion location
+ // is set.
+ //
+ builder->setInsertInto(nullptr);
+ auto newVal = builder->emitElementExtract(
+ oldArg->getFullType(),
+ newBase,
+ newIndex);
+
+ // Because our new instruction wasn't
+ // actually inserted anywhere, we need to
+ // add it to our gathered list of instructions
+ // that should be inserted into the body of
+ // the specialized callee.
+ //
+ ioInfo.newBodyInsts.Add(newVal);
+
+ return newVal;
+ }
+ else
+ {
+ // If we don't match one of the above cases,
+ // then `isArgSuitableForSpecialization` is
+ // letting through cases that this function
+ // hasn't been updated to handle.
+ //
+ SLANG_UNEXPECTED("mising case in 'getSpecializedValueForArg'");
+ UNREACHABLE_RETURN(nullptr);
+ }
+ }
+
+ // Now that we've covered how all the relevant information
+ // gets gathered, we can turn our attention to the
+ // meat of actually generating a specialized version
+ // of a function.
+ //
+ // For the most part, this is just a matter of *cloning*
+ // the original function, while keeping around a mapping
+ // from original values/instructions to their replacements.
+ //
+ // Because we might perform specialization many times,
+ // it will get is own nested context type.
+ //
+ struct CloneContext
+ {
+ // When cloning, we need an IR builder to use for
+ // making new instructions.
+ //
+ IRBuilder* builder;
+
+ // We also need a mapping from old instruction to their
+ // new equivalents, which will serve double duty:
+ //
+ // * Before we start cloning, this will be used to
+ // register the mapping from things that are to be
+ // replaced entirely (like function parameters to
+ // be specialized away) to their replacements (like
+ // a global shader parameter).
+ //
+ // * During the process of cloning, this will be
+ // updated as we clone instructions so that when
+ // an instruction later in the function refers to
+ // something from earlier, we can look up the
+ // replacement.
+ //
+ Dictionary<IRInst*, IRInst*> mapOldValToNew;
+
+ // Whenever we need to look up an operand value
+ // during the cloning process we'll use `cloneOperand`,
+ // which mostly just uses `mapOldValToNew`.
+ //
+ IRInst* cloneOperand(IRInst* oldOperand)
+ {
+ IRInst* newOperand = nullptr;
+ if(mapOldValToNew.TryGetValue(oldOperand, newOperand))
+ return newOperand;
+
+ // The one wrinkle here, and the place where
+ // this cloning logic differs from some other
+ // IR cloning implementations we have lying around,
+ // is that when we *don't* find an instruction in
+ // our map, we automatically assume it is not
+ // something taht needs to be cloned, so that the old
+ // value is fine to use as-is.
+ //
+ // Note that this puts an ordering constraint on
+ // our work: if we are going to clone some instruction
+ // A, then we had better clone it *before* anything
+ // that uses A as an operand.
+ //
+ return oldOperand;
+ }
+
+ // The SSA property and the way we have structured
+ // our "phi nodes" (block parameters) means that
+ // just going through the children of a function,
+ // and then the children of a block will generally
+ // do the Right Thing and always visit an instruction
+ // before its uses.
+ //
+ // The big exception to this is that branch instructions
+ // can refer to blocks later in the same function.
+ //
+ // We work around this sort of problem in a fairly
+ // general fashion, by splitting the cloning of
+ // an instruction into two steps.
+ //
+ // The first step is just to clone the instruction
+ // and its direct operands, but not any decorations
+ // or children.
+ //
+ IRInst* cloneInstAndOperands(IRInst* oldInst)
+ {
+ // In order to clone an instruction we first
+ // need to map its operands over to their
+ // new values.
+ //
+ List<IRInst*> newOperands;
+ UInt operandCount = oldInst->getOperandCount();
+ for(UInt ii = 0; ii < operandCount; ++ii)
+ {
+ auto oldOperand = oldInst->getOperand(ii);
+ auto newOperand = cloneOperand(oldOperand);
+ newOperands.Add(newOperand);
+ }
+
+ // Now we can just tell the IR builder to
+ // go and create an instruction directly
+ //
+ // Note: this logic would not handle any instructions
+ // with special-case data attached, but that only
+ // applies to `IRConstant`s at this point, and those
+ // should only appear at the global scope rather than
+ // in function bodies.
+ //
+ SLANG_ASSERT(!as<IRConstant>(oldInst));
+ auto newInst = builder->emitIntrinsicInst(
+ oldInst->getFullType(),
+ oldInst->op,
+ newOperands.Count(),
+ newOperands.Buffer());
+
+ return newInst;
+ }
+
+ // The second phase of cloning an instruction is to clone
+ // its decorations and children. This step only needs to
+ // be performed on those instructions that *have* decorations
+ // and/or children.
+ //
+ // The complexity of this step comes from the fact that it
+ // needs to sequence the two phases of cloning for any
+ // child instructions. We will do this by performing the
+ // first phase of cloning, and building up a list of
+ // children that require the second phase of processing.
+ // Each entry in that list will be a pair of an old instruction
+ // and its new clone.
+ //
+ struct OldNewPair
+ {
+ IRInst* oldInst;
+ IRInst* newInst;
+ };
+ void cloneInstDecorationsAndChildren(IRInst* oldInst, IRInst* newInst)
+ {
+ List<OldNewPair> pairs;
+ for( auto oldChild : oldInst->getDecorationsAndChildren() )
+ {
+ // As a very subtle special case, if one of the children
+ // of our `oldInst` already has a registered replacement,
+ // then we don't want to clone it (not least because
+ // the `Dictionary::Add` method would give us an error
+ // when we try to insert a new value for the same key).
+ //
+ // This arises for entries in `mapOldValToNew` that were
+ // seeded before cloning begain (e.g., the function
+ // parameters that are to be replaced).
+ //
+ if(mapOldValToNew.ContainsKey(oldChild))
+ continue;
+
+ // Because we are re-using the same IR builder in
+ // multiple places, we need to make sure to set
+ // its insertion location before creating the
+ // child instruction.
+ //
+ builder->setInsertInto(newInst);
+
+ // Now we can perform the first phase of cloning
+ // on the child, and register it in our map from
+ // old to new values.
+ //
+ auto newChild = cloneInstAndOperands(oldChild);
+ mapOldValToNew.Add(oldChild, newChild);
+
+ // If an only if the old child had decorations
+ // or children, we will register it into our
+ // list for processing in the second phase.
+ //
+ if( oldChild->getFirstDecorationOrChild() )
+ {
+ OldNewPair pair;
+ pair.oldInst = oldChild;
+ pair.newInst = newChild;
+ pairs.Add(pair);
+ }
+ }
+
+ // Once we have done first-phase processing for
+ // all child instructions, we scan through those
+ // in the list that required second-phase processing,
+ // and clone their decorations and/or children recursively.
+ //
+ for( auto pair : pairs )
+ {
+ auto oldChild = pair.oldInst;
+ auto newChild = pair.newInst;
+
+ cloneInstDecorationsAndChildren(oldChild, newChild);
+ }
+ }
+ };
+
+ // With all of that machinery out of the way,
+ // we are now prepared to walk through the process of
+ // specializing a given callee function based on
+ // the information we have gathered.
+ //
+ IRFunc* generateSpecializedFunc(
+ IRFunc* oldFunc,
+ FuncSpecializationInfo const& funcInfo)
+ {
+ // We start by setting up our context for cloning
+ // the blocks and instructions in the old function.
+ //
+ auto builder = getBuilder();
+ CloneContext cloneContext;
+ cloneContext.builder = builder;
+
+ // Next we iterate over the parameters of the old
+ // function, and register each as being mapped
+ // to its replacement in the `funcInfo` that was
+ // already gathered.
+ //
+ UInt paramCounter = 0;
+ for( auto oldParam : oldFunc->getParams() )
+ {
+ UInt paramIndex = paramCounter++;
+ auto newVal = funcInfo.replacementsForOldParameters[paramIndex];
+ cloneContext.mapOldValToNew.Add(oldParam, newVal);
+ }
+
+ // Next we will create the skeleton of the new
+ // specialized function, including its type.
+ //
+ // To get the type of the new function we will
+ // iterate over the collected list of new
+ // parameters (which may differ greatly from the
+ // parameter list of the original) and extract
+ // their types.
+ //
+ List<IRType*> paramTypes;
+ for( auto param : funcInfo.newParams )
+ {
+ paramTypes.Add(param->getFullType());
+ }
+ IRType* funcType = builder->getFuncType(
+ paramTypes.Count(),
+ paramTypes.Buffer(),
+ oldFunc->getResultType());
+
+ IRFunc* newFunc = builder->createFunc();
+ newFunc->setFullType(funcType);
+
+ // The above step has accomplished the "first phase"
+ // of cloning the function (since `IRFunc`s have no
+ // operands).
+ //
+ // We can now call into our `CloneContext` to perform
+ // the second phase of cloning, which will recursively
+ // clone any nested decorations, blocks, and instructions.
+ //
+ cloneContext.cloneInstDecorationsAndChildren(oldFunc, newFunc);
+
+ // We are almost done at this point, except that `newFunc`
+ // is lacking its parameters, as well as any of the body
+ // instructions that we decided were needed during
+ // the information-gathering steps.
+ //
+ // We will insert these instructions into the first block
+ // of the function, before its first ordinary instruction.
+ // We know that these should exist because we had as
+ // a precondition that `oldFunc` was a definition (so it
+ // has at least one block), and in valid IR every block
+ // has at least one ordinary instruction (its terminator).
+ //
+ auto newEntryBlock = newFunc->getFirstBlock();
+ SLANG_ASSERT(newEntryBlock);
+ auto newFirstOrdinary = newEntryBlock->getFirstOrdinaryInst();
+ SLANG_ASSERT(newFirstOrdinary);
+
+ // We simply iterate over the list of parameters and then
+ // body instructions that were produced in the information
+ // gathering step, and insert each before `newFirstOrdinary`,
+ // which has the effect or arranging them in the output
+ // in the order they are enumerated here.
+ //
+ for( auto newParam : funcInfo.newParams )
+ {
+ newParam->insertBefore(newFirstOrdinary);
+ }
+ for( auto newBodyInst : funcInfo.newBodyInsts )
+ {
+ newBodyInst->insertBefore(newFirstOrdinary);
+ }
+
+ // At this point we've created a new specialized function,
+ // and as such it may contain call sites that were not
+ // covered when we built our initial work list.
+ //
+ // Before handing the specialized function back to the
+ // caller, we will make sure to recursively add any
+ // potentially-specializable call sites to our work list.
+ //
+ addCallsToWorkListRec(newFunc);
+
+ return newFunc;
+ }
+};
+
+// The top-level function for invoking the specialization pass
+// is straighforward. We set up the context object
+// and then defer to it for the real work.
+//
+void specializeResourceParameters(
+ CompileRequest* compileRequest,
+ TargetRequest* targetRequest,
+ IRModule* module)
+{
+ ResourceParameterSpecializationContext context;
+ context.compileRequest = compileRequest;
+ context.targetRequest = targetRequest;
+ context.module = module;
+
+ context.processModule();
+}
+
+} // namesapce Slang
diff --git a/source/slang/ir-specialize-resources.h b/source/slang/ir-specialize-resources.h
new file mode 100644
index 000000000..3d6ead130
--- /dev/null
+++ b/source/slang/ir-specialize-resources.h
@@ -0,0 +1,24 @@
+// ir-specialize-resources.h
+#pragma once
+
+namespace Slang
+{
+ class CompileRequest;
+ class TargetRequest;
+ struct IRModule;
+
+ /// Specialize calls to functions with resource-type parameters.
+ ///
+ /// For any function that has resource-type input parameters that
+ /// would be invalid on the chosen target, this pass will rewrite
+ /// any call sites that pass suitable arguments (e.g., direct
+ /// references to global shader parameters) to instead call
+ /// a specialized variant of the function that does not have
+ /// those resource parameters (and instead, e.g, refers to the
+ /// global shader parameters directly).
+ ///
+ void specializeResourceParameters(
+ CompileRequest* compileRequest,
+ TargetRequest* targetRequest,
+ IRModule* module);
+}
diff --git a/source/slang/ir.cpp b/source/slang/ir.cpp
index 0d93957c8..60e983711 100644
--- a/source/slang/ir.cpp
+++ b/source/slang/ir.cpp
@@ -555,6 +555,23 @@ namespace Slang
return entryBlock->getFirstParam();
}
+ IRParam* IRGlobalValueWithParams::getLastParam()
+ {
+ auto entryBlock = getFirstBlock();
+ if(!entryBlock) return nullptr;
+
+ return entryBlock->getLastParam();
+ }
+
+ IRInstList<IRParam> IRGlobalValueWithParams::getParams()
+ {
+ auto entryBlock = getFirstBlock();
+ if(!entryBlock) return IRInstList<IRParam>();
+
+ return entryBlock->getParams();
+ }
+
+
// IRFunc
IRType* IRFunc::getResultType() { return getDataType()->getResultType(); }
@@ -2774,15 +2791,10 @@ namespace Slang
}
}
-
- static String getName(
+ static String createName(
IRDumpContext* context,
IRInst* value)
{
- String name = 0;
- if (context->mapValueToName.TryGetValue(value, name))
- return name;
-
if(auto nameHintDecoration = value->findDecoration<IRNameHintDecoration>())
{
String nameHint = nameHintDecoration->getName();
@@ -2811,6 +2823,19 @@ namespace Slang
}
}
+ static String getName(
+ IRDumpContext* context,
+ IRInst* value)
+ {
+ String name;
+ if (context->mapValueToName.TryGetValue(value, name))
+ return name;
+
+ name = createName(context, value);
+ context->mapValueToName.Add(value, name);
+ return name;
+ }
+
static void dumpID(
IRDumpContext* context,
IRInst* inst)
@@ -3747,6 +3772,7 @@ namespace Slang
case kIROp_GlobalGenericParam:
case kIROp_WitnessTable:
case kIROp_WitnessTableEntry:
+ case kIROp_Block:
return false;
case kIROp_Nop:
@@ -3808,6 +3834,19 @@ namespace Slang
return nullptr;
}
+ //
+ // IRType
+ //
+
+ IRType* unwrapArray(IRType* type)
+ {
+ IRType* t = type;
+ while( auto arrayType = as<IRArrayTypeBase>(t) )
+ {
+ t = arrayType->getElementType();
+ }
+ return t;
+ }
//
// Legalization of entry points for GLSL:
@@ -4880,6 +4919,7 @@ namespace Slang
void legalizeRayTracingEntryPointParameterForGLSL(
GLSLLegalizationContext* context,
+ IRFunc* func,
IRParam* pp,
VarLayout* paramLayout)
{
@@ -4902,6 +4942,31 @@ namespace Slang
builder->addLayoutDecoration(globalParam, paramLayout);
moveValueBefore(globalParam, builder->getFunc());
pp->replaceUsesWith(globalParam);
+
+ // Because linkage between ray-tracing shaders is
+ // based on the type of incoming/outgoing payload
+ // and attribute parameters, it would be an error to
+ // eliminate the global parameter *even if* it is
+ // not actually used inside the entry point.
+ //
+ // We attach a decoration to the entry point that
+ // makes note of the dependency, so that steps
+ // like dead code elimination cannot get rid of
+ // the parameter.
+ //
+ // TODO: We could consider using a structure like
+ // this for *all* of the entry point parameters
+ // that get moved to the global scope, since SPIR-V
+ // ends up requiring such information on an `OpEntryPoint`.
+ //
+ // As a further alternative, we could decide to
+ // keep entry point varying input/outtput attached
+ // to the parameter list through all of the Slang IR
+ // steps, and only declare it as global variables at
+ // the last minute when emitting a GLSL `main` or
+ // SPIR-V for an entry point.
+ //
+ builder->addDependsOnDecoration(func, globalParam);
}
void legalizeEntryPointParameterForGLSL(
@@ -5059,7 +5124,7 @@ namespace Slang
case Stage::Intersection:
case Stage::Miss:
case Stage::RayGeneration:
- legalizeRayTracingEntryPointParameterForGLSL(context, pp, paramLayout);
+ legalizeRayTracingEntryPointParameterForGLSL(context, func, pp, paramLayout);
return;
}
diff --git a/source/slang/ir.h b/source/slang/ir.h
index d68b3b8f3..488611675 100644
--- a/source/slang/ir.h
+++ b/source/slang/ir.h
@@ -459,6 +459,8 @@ struct IRType : IRInst
IR_PARENT_ISA(Type)
};
+IRType* unwrapArray(IRType* type);
+
struct IRBasicType : IRType
{
BaseType getBaseType() { return BaseType(op - kIROp_FirstBasicType); }
@@ -991,6 +993,8 @@ struct IRGlobalValueWithParams : IRGlobalValueWithCode
// which are actually the parameters of the first
// block.
IRParam* getFirstParam();
+ IRParam* getLastParam();
+ IRInstList<IRParam> getParams();
IR_PARENT_ISA(GlobalValueWithParams)
};
@@ -1010,6 +1014,8 @@ struct IRFunc : IRGlobalValueWithParams
UInt getParamCount();
IRType* getParamType(UInt index);
+ bool isDefinition() { return getFirstBlock() != nullptr; }
+
IR_LEAF_ISA(Func)
};
diff --git a/source/slang/lower-to-ir.cpp b/source/slang/lower-to-ir.cpp
index 18d42feab..74ec35fcd 100644
--- a/source/slang/lower-to-ir.cpp
+++ b/source/slang/lower-to-ir.cpp
@@ -5516,10 +5516,14 @@ static void lowerEntryPointToIR(
}
auto loweredEntryPointFunc = ensureDecl(context, entryPointFuncDecl);
+ // Attach a marker decoraton so that we recognize
+ // this as an entry point.
+ auto builder = context->irBuilder;
+ builder->addEntryPointDecoration(getSimpleVal(context, loweredEntryPointFunc));
+
// Now lower all the arguments supplied for global generic
// type parameters.
//
- auto builder = context->irBuilder;
builder->setInsertInto(builder->getModule()->getModuleInst());
for (RefPtr<Substitutions> subst = entryPointRequest->globalGenericSubst; subst; subst = subst->outer)
{
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index 427127c05..6ba32f954 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -182,6 +182,7 @@
<ClInclude Include="glsl.meta.slang.h" />
<ClInclude Include="hlsl.meta.slang.h" />
<ClInclude Include="ir-constexpr.h" />
+ <ClInclude Include="ir-dce.h" />
<ClInclude Include="ir-dominators.h" />
<ClInclude Include="ir-inst-defs.h" />
<ClInclude Include="ir-insts.h" />
@@ -190,6 +191,7 @@
<ClInclude Include="ir-restructure.h" />
<ClInclude Include="ir-sccp.h" />
<ClInclude Include="ir-serialize.h" />
+ <ClInclude Include="ir-specialize-resources.h" />
<ClInclude Include="ir-ssa.h" />
<ClInclude Include="ir-validate.h" />
<ClInclude Include="ir.h" />
@@ -230,6 +232,7 @@
<ClCompile Include="dxc-support.cpp" />
<ClCompile Include="emit.cpp" />
<ClCompile Include="ir-constexpr.cpp" />
+ <ClCompile Include="ir-dce.cpp" />
<ClCompile Include="ir-dominators.cpp" />
<ClCompile Include="ir-legalize-types.cpp" />
<ClCompile Include="ir-missing-return.cpp" />
@@ -237,6 +240,7 @@
<ClCompile Include="ir-restructure.cpp" />
<ClCompile Include="ir-sccp.cpp" />
<ClCompile Include="ir-serialize.cpp" />
+ <ClCompile Include="ir-specialize-resources.cpp" />
<ClCompile Include="ir-ssa.cpp" />
<ClCompile Include="ir-validate.cpp" />
<ClCompile Include="ir.cpp" />
@@ -300,4 +304,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project> \ No newline at end of file
+</Project> \ No newline at end of file
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index edd51db88..eaafa6e79 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -12,6 +12,9 @@
<ClInclude Include="..\..\slang.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="check.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="compiler.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -42,6 +45,9 @@
<ClInclude Include="ir-constexpr.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="ir-dce.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="ir-dominators.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -66,6 +72,9 @@
<ClInclude Include="ir-serialize.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="ir-specialize-resources.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="ir-ssa.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -162,9 +171,6 @@
<ClInclude Include="visitor.h">
<Filter>Header Files</Filter>
</ClInclude>
- <ClInclude Include="check.h">
- <Filter>Header Files</Filter>
- </ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="check.cpp">
@@ -185,6 +191,9 @@
<ClCompile Include="ir-constexpr.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="ir-dce.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="ir-dominators.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@@ -206,6 +215,9 @@
<ClCompile Include="ir-serialize.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="ir-specialize-resources.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="ir-ssa.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/source/slang/syntax.cpp b/source/slang/syntax.cpp
index d354057b2..6e57a7a57 100644
--- a/source/slang/syntax.cpp
+++ b/source/slang/syntax.cpp
@@ -344,7 +344,7 @@ void Type::accept(IValVisitor* visitor, void* extra)
auto arrType = type->AsArrayType();
if (!arrType)
return false;
- return (ArrayLength->EqualsVal(arrType->ArrayLength) && baseType->Equals(arrType->baseType.Ptr()));
+ return (areValsEqual(ArrayLength, arrType->ArrayLength) && baseType->Equals(arrType->baseType.Ptr()));
}
RefPtr<Val> ArrayExpressionType::SubstituteImpl(SubstitutionSet subst, int* ioDiff)
diff --git a/source/slang/syntax.h b/source/slang/syntax.h
index 5eb40fefb..bd7de74ad 100644
--- a/source/slang/syntax.h
+++ b/source/slang/syntax.h
@@ -1151,6 +1151,14 @@ namespace Slang
SyntaxNodeBase* createInstanceOfSyntaxClassByName(
String const& name);
+ // `Val`
+
+ inline bool areValsEqual(Val* left, Val* right)
+ {
+ if(!left || !right) return left == right;
+ return left->EqualsVal(right);
+ }
+
//
inline BaseType GetVectorBaseType(VectorExpressionType* vecType) {
diff --git a/tests/compute/func-cbuffer-param.slang b/tests/compute/func-cbuffer-param.slang
new file mode 100644
index 000000000..5730272ab
--- /dev/null
+++ b/tests/compute/func-cbuffer-param.slang
@@ -0,0 +1,40 @@
+// func-cbuffer-param.slang
+
+// Test that passing a `ConstantBuffer<X>` parameter
+// into a function works across all target.
+
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-dx12 -compute -use-dxil
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
+
+struct Data
+{
+ int4 val[4];
+}
+
+//TEST_INPUT:cbuffer(data=[0 1 2 3 16 17 18 19 32 33 34 35 48 49 50 51]):dxbinding(0),glbinding(0)
+ConstantBuffer<Data> a;
+
+//TEST_INPUT:cbuffer(data=[16 17 18 19 32 33 34 35 48 49 50 51 64 65 66 67]):dxbinding(1),glbinding(1)
+ConstantBuffer<Data> b;
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(2),out
+RWStructuredBuffer<int> outputBuffer;
+
+int helper(ConstantBuffer<Data> buffer, int index)
+{
+ return buffer.val[index].x;
+}
+
+int test(int val)
+{
+ return val + helper(a, val) + helper(b, val);
+}
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int inVal = (int) dispatchThreadID.x;
+ int outVal = test(inVal);
+ outputBuffer[dispatchThreadID.x] = outVal;
+} \ No newline at end of file
diff --git a/tests/compute/func-cbuffer-param.slang.expected.txt b/tests/compute/func-cbuffer-param.slang.expected.txt
new file mode 100644
index 000000000..35000ff87
--- /dev/null
+++ b/tests/compute/func-cbuffer-param.slang.expected.txt
@@ -0,0 +1,4 @@
+10
+31
+52
+73
diff --git a/tests/compute/func-resource-param.slang b/tests/compute/func-resource-param.slang
new file mode 100644
index 000000000..19784b108
--- /dev/null
+++ b/tests/compute/func-resource-param.slang
@@ -0,0 +1,35 @@
+// func-resource-param.slang
+
+// Test that a function with a resource parameter that
+// requires non-trivial legalization can be compiled
+// to work on GLSL-based targets.
+
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-dx12 -compute
+//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
+
+//NO_TEST:SIMPLE:-target glsl -entry computeMain -stage compute -validate-ir -dump-ir
+
+//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):dxbinding(0),glbinding(0),out
+RWStructuredBuffer<int> outputBuffer;
+
+//TEST_INPUT:ubuffer(data=[0 16 32 48], stride=4):dxbinding(1),glbinding(1)
+RWStructuredBuffer<int> inputBuffer;
+
+int helper(RWStructuredBuffer<int> buffer, int index)
+{
+ return buffer[index];
+}
+
+int test(int val)
+{
+ return helper(inputBuffer, val) + val;
+}
+
+[numthreads(4, 1, 1)]
+void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
+{
+ int inVal = (int) dispatchThreadID.x;
+ int outVal = test(inVal);
+ outputBuffer[dispatchThreadID.x] = outVal;
+} \ No newline at end of file
diff --git a/tests/compute/func-resource-param.slang.expected.txt b/tests/compute/func-resource-param.slang.expected.txt
new file mode 100644
index 000000000..d4cb1cc00
--- /dev/null
+++ b/tests/compute/func-resource-param.slang.expected.txt
@@ -0,0 +1,4 @@
+0
+11
+22
+33
diff --git a/tests/cross-compile/func-resource-param-array.slang b/tests/cross-compile/func-resource-param-array.slang
new file mode 100644
index 000000000..7062169dc
--- /dev/null
+++ b/tests/cross-compile/func-resource-param-array.slang
@@ -0,0 +1,62 @@
+// func-resource-param-array.slang
+
+//TEST:CROSS_COMPILE:-target spirv-assembly -entry main -stage compute
+
+// Test that we gernerate expected code for scenarios involving
+// resource-type function parameters, even when working with
+// arrays of resources.
+
+int f(RWStructuredBuffer<int> fx, uint fi) { return fx[fi] ; }
+
+// TODO: Note that we are declaring the function
+// parameter here with an explicitly-sized array
+// because Slang currently doesn't support converison
+// from a sized to an unsized array type.
+//
+int g(RWStructuredBuffer<int> gx[3], uint gi, uint gj) { return gx[gi][gj]; }
+
+RWStructuredBuffer<int> a;
+RWStructuredBuffer<int> b[3];
+
+// Note: Slang currently genreates an array-of-arrays in the output
+// for this declaration, which glslang complains leads to invalid
+// SPIR-V. This means that there is yet another legalization step
+// that Slang should perform on this declaration.
+//
+// For now we are fine with generating invalid SPIR-V, because
+// we are not going to execute the output of this test case.
+//
+RWStructuredBuffer<int> c[4][3];
+
+void main(uint3 tid : SV_DispatchThreadID)
+{
+ uint ii = tid.x;
+ uint jj = tid.y;
+ uint kk = tid.z;
+
+ // Can we specialize `f`?
+ //
+ int tmp = f(a, ii);
+
+ // If we ask for the same specialization again, do
+ // we avoid code duplication?
+ //
+ tmp += f(a, jj);
+
+ // If we pass in a reference to an array element,
+ // can we still specialize?
+ //
+ tmp += f(b[ii], jj);
+
+ // If we have a function that takes an *array* can
+ // we specialize?
+ //
+ tmp += g(b, ii, jj);
+
+ // What if the function takes an array, and we pass
+ // in an element of an array-of-arrays?
+ //
+ tmp += g(c[ii], jj, kk);
+
+ a[ii] = tmp;
+}
diff --git a/tests/cross-compile/func-resource-param-array.slang.glsl b/tests/cross-compile/func-resource-param-array.slang.glsl
new file mode 100644
index 000000000..6224ccd1c
--- /dev/null
+++ b/tests/cross-compile/func-resource-param-array.slang.glsl
@@ -0,0 +1,91 @@
+// func-resource-param-array.slang.glsl
+#version 450
+
+#define a a_0
+#define b b_0
+#define c c_0
+#define ii ii_0
+#define jj jj_0
+#define kk kk_0
+
+#define f_a f_0
+#define f_b f_1
+#define g_b g_0
+#define g_c g_1
+
+#define a_block _S1
+#define b_block _S2
+#define c_block _S3
+
+#define f_a_i _S4
+#define f_b_t _S5
+#define f_b_i _S6
+#define g_b_i _S7
+#define g_b_j _S8
+#define g_c_t _S9
+#define g_c_i _S10
+#define g_c_j _S11
+#define tmp_f_a_ii _S12
+#define tmp_f_a_jj _S13
+#define tmp_f_b _S14
+#define tmp_g_b _S15
+#define tmp_g_c _S16
+
+layout(std430, binding = 0) buffer a_block {
+ int _data[];
+} a;
+
+layout(std430, binding = 1) buffer b_block {
+ int _data[];
+} b[3];
+
+layout(std430, binding = 2) buffer c_block {
+ int _data[];
+} c[4][3];
+
+int f_a(uint f_a_i)
+{
+ return a._data[f_a_i];
+}
+
+int f_b(uint f_b_t, uint f_b_i)
+{
+ return b[f_b_t]._data[f_b_i];
+}
+
+int g_b(uint g_b_i, uint g_b_j)
+{
+ return b[g_b_i]._data[g_b_j];
+}
+
+int g_c(uint g_c_t, uint g_c_i, uint g_c_j)
+{
+ return c[g_c_t][g_c_i]._data[g_c_j];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+void main()
+{
+ uint ii = gl_GlobalInvocationID.x;
+ uint jj = gl_GlobalInvocationID.y;
+ uint kk = gl_GlobalInvocationID.z;
+
+ int tmp_f_a_ii = f_a(ii);
+
+ int tmp_f_a_jj = f_a(jj);
+ int tmp_0 = tmp_f_a_ii + tmp_f_a_jj;
+
+ int tmp_f_b = f_b(ii, jj);
+ int tmp_1 = tmp_0 + tmp_f_b;
+
+ int tmp_g_b = g_b(ii, jj);
+ int tmp_2 = tmp_1 + tmp_g_b;
+
+ int tmp_g_c = g_c(ii, jj, kk);
+ int tmp_3 = tmp_2 + tmp_g_c;
+
+ a._data[ii] = tmp_3;
+
+ return;
+}
diff --git a/tests/ir/string-literal.slang.expected b/tests/ir/string-literal.slang.expected
index 7bc66d682..b86eab2c8 100644
--- a/tests/ir/string-literal.slang.expected
+++ b/tests/ir/string-literal.slang.expected
@@ -1,5 +1,6 @@
result code = 0
standard error = {
+[entryPoint]
[export("_S04mainp1puV")]
[nameHint("main")]
func %main : Func(Void, UInt)