summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--source/slang/slang-emit-c-like.cpp20
-rw-r--r--source/slang/slang-emit-cpp.cpp168
-rw-r--r--source/slang/slang-emit-cpp.h7
-rw-r--r--source/slang/slang-emit-cuda.cpp234
-rw-r--r--source/slang/slang-emit-cuda.h2
-rw-r--r--source/slang/slang-emit.cpp62
-rw-r--r--source/slang/slang-ir-entry-point-raw-ptr-params.cpp121
-rw-r--r--source/slang/slang-ir-entry-point-raw-ptr-params.h12
-rw-r--r--source/slang/slang-ir-entry-point-uniforms.cpp382
-rw-r--r--source/slang/slang-ir-entry-point-uniforms.h10
-rw-r--r--source/slang/slang-ir-explicit-global-context.cpp523
-rw-r--r--source/slang/slang-ir-explicit-global-context.h15
-rw-r--r--source/slang/slang-ir-explicit-global-init.cpp207
-rw-r--r--source/slang/slang-ir-explicit-global-init.h11
-rw-r--r--source/slang/slang-ir-insts.h6
-rw-r--r--source/slang/slang-ir-legalize-varying-params.cpp3
-rw-r--r--source/slang/slang-type-layout.cpp57
-rw-r--r--source/slang/slang.vcxproj8
-rw-r--r--source/slang/slang.vcxproj.filters18
19 files changed, 1329 insertions, 537 deletions
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 582f5e445..733811183 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -2341,16 +2341,20 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO
case kIROp_BitCast:
{
- // TODO: we can simplify the logic for arbitrary bitcasts
- // by always bitcasting the source to a `uint*` type (if it
- // isn't already) and then bitcasting that to the destination
- // type (if it isn't already `uint*`.
+ // Note: we are currently emitting casts as plain old
+ // C-style casts, which may not always perform a bitcast.
//
- // For now we are assuming the source type is *already*
- // a `uint*` type of the appropriate size.
- //
- // auto fromType = extractBaseType(inst->getOperand(0)->getDataType());
+ // TODO: This operation should map to an intrinsic to be
+ // provided in a prelude for C/C++, so that the target
+ // can easily emit code for whatever the best possible
+ // bitcast is on the platform.
+ auto prec = getInfo(EmitOp::Prefix);
+ needClose = maybeEmitParens(outerPrec, prec);
+
+ m_writer->emit("(");
+ emitType(inst->getDataType());
+ m_writer->emit(")");
m_writer->emit("(");
emitOperand(inst->getOperand(0), getInfo(EmitOp::General));
m_writer->emit(")");
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index b59611b38..b71feafc1 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -1767,7 +1767,6 @@ void CPPSourceEmitter::_emitWitnessTableDefinitions()
else
isFirstEntry = false;
- m_writer->emit("&KernelContext::");
m_writer->emit(_getWitnessTableWrapperFuncName(funcVal));
}
else if (auto witnessTableVal = as<IRWitnessTable>(entry->getSatisfyingVal()))
@@ -1830,7 +1829,7 @@ void CPPSourceEmitter::_maybeEmitWitnessTableTypeDefinition(
if (auto funcVal = as<IRFuncType>(entry->getRequirementVal()))
{
emitType(funcVal->getResultType());
- m_writer->emit(" (KernelContext::*");
+ m_writer->emit(" (*");
m_writer->emit(getName(entry->getRequirementKey()));
m_writer->emit(")");
m_writer->emit("(");
@@ -1964,8 +1963,7 @@ void CPPSourceEmitter::emitSimpleFuncImpl(IRFunc* func)
// on CPU/CUDA, and these all bottleneck through the actual `IRFunc`
// here as a workhorse.
//
- // Because the workhorse function is currently emitted as a member of
- // `KernelContext`, and doesn't have the right signature to service
+ // Because the workhorse function doesn't have the right signature to service
// general-purpose calls, it is being emitted with a `_` prefix.
//
StringBuilder prefixName;
@@ -2288,15 +2286,6 @@ bool CPPSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOut
// Does this function declare any requirements.
handleCallExprDecorationsImpl(funcValue);
- if (funcValue->op == kIROp_lookup_interface_method)
- {
- m_writer->emit("(this->*(");
- emitOperand(funcValue, EmitOpInfo());
- m_writer->emit("))");
- _emitCallArgList(as<IRCall>(inst));
- return true;
- }
-
// try doing automatically
return _tryEmitInstExprAsIntrinsic(inst, inOuterPrec);
}
@@ -2389,8 +2378,6 @@ void CPPSourceEmitter::emitPreprocessorDirectivesImpl()
m_writer->emit("#ifdef SLANG_PRELUDE_NAMESPACE\n");
m_writer->emit("using namespace SLANG_PRELUDE_NAMESPACE;\n");
m_writer->emit("#endif\n\n");
-
- m_writer->emit("struct KernelContext;\n\n");
}
if (m_target == CodeGenTarget::CSource)
@@ -2470,7 +2457,7 @@ static bool _isFunction(IROp op)
return op == kIROp_Func;
}
-void CPPSourceEmitter::_emitEntryPointDefinitionStart(IRFunc* func, IRGlobalParam* entryPointParams, IRGlobalParam* globalParams, const String& funcName, const UnownedStringSlice& varyingTypeName)
+void CPPSourceEmitter::_emitEntryPointDefinitionStart(IRFunc* func, const String& funcName, const UnownedStringSlice& varyingTypeName)
{
auto resultType = func->getResultType();
@@ -2488,31 +2475,6 @@ void CPPSourceEmitter::_emitEntryPointDefinitionStart(IRFunc* func, IRGlobalPara
m_writer->emit("\n{\n");
m_writer->indent();
- // Initialize when constructing so that globals are zeroed
- m_writer->emit("KernelContext context = {};\n");
-
- if (entryPointParams)
- {
- auto param = entryPointParams;
- auto paramType = param->getDataType();
-
- m_writer->emit("context.");
- m_writer->emit(getName(param));
- m_writer->emit(" = (");
- emitType(paramType);
- m_writer->emit(")entryPointParams; \n");
- }
- if (globalParams)
- {
- auto param = globalParams;
- auto paramType = param->getDataType();
-
- m_writer->emit("context.");
- m_writer->emit(getName(param));
- m_writer->emit(" = (");
- emitType(paramType);
- m_writer->emit(")globalParams; \n");
- }
}
void CPPSourceEmitter::_emitEntryPointDefinitionEnd(IRFunc* func)
@@ -2577,9 +2539,9 @@ void CPPSourceEmitter::_emitEntryPointGroup(const Int sizeAlongAxis[kThreadGroup
}
// just call at inner loop point
- m_writer->emit("context._");
+ m_writer->emit("_");
m_writer->emit(funcName);
- m_writer->emit("(&threadInput);\n");
+ m_writer->emit("(&threadInput, entryPointParams, globalParams);\n");
// Close all the loops
for (Index i = Index(axes.getCount() - 1); i >= 0; --i)
@@ -2675,97 +2637,6 @@ void CPPSourceEmitter::_emitForwardDeclarations(const List<EmitAction>& actions)
}
}
-static bool isVaryingResourceKind(LayoutResourceKind kind)
-{
- switch(kind)
- {
- default:
- return false;
-
- case LayoutResourceKind::VaryingInput:
- case LayoutResourceKind::VaryingOutput:
- return true;
- }
-}
-
-static bool isVaryingParameter(IRTypeLayout* typeLayout)
-{
- for(auto sizeAttr : typeLayout->getSizeAttrs())
- {
- if(!isVaryingResourceKind(sizeAttr->getResourceKind()))
- return false;
- }
- return true;
-}
-
-static bool isVaryingParameter(IRVarLayout* varLayout)
-{
- return isVaryingParameter(varLayout->getTypeLayout());
-}
-
-void CPPSourceEmitter::_findShaderParams(
- IRGlobalParam** outEntryPointParam,
- IRGlobalParam** outGlobalParam)
-{
- SLANG_ASSERT(outEntryPointParam);
- SLANG_ASSERT(outGlobalParam);
-
- IRGlobalParam*& entryPointParam = *outEntryPointParam;
- IRGlobalParam*& globalParam = *outGlobalParam;
-
- for(auto inst : m_irModule->getGlobalInsts())
- {
- auto param = as<IRGlobalParam>(inst);
- if(!param)
- continue;
-
- if(auto layoutDecor = param->findDecoration<IRLayoutDecoration>())
- {
- if(auto varLayout = as<IRVarLayout>(layoutDecor->getLayout()))
- {
- if(isVaryingParameter(varLayout))
- continue;
- auto typeLayout = varLayout->getTypeLayout();
- if(typeLayout->findSizeAttr(LayoutResourceKind::VaryingInput))
- continue;
- if(typeLayout->findSizeAttr(LayoutResourceKind::VaryingOutput))
- continue;
- }
- }
-
- // Currently, the entry-point parameters
- // are represented as a single parameter
- // at the global scope, and the same is
- // true of the parameters that were
- // originally declared as globals.
- //
- // We need to find capture each of these
- // parameters, and we need to tell them
- // apart. Luckily, the logic that
- // moved the entry-point parameters to
- // global scope will ahve also marked
- // the entry-point parameters with
- // a decoration that we can detect.
- //
- if (inst->findDecorationImpl(kIROp_EntryPointParamDecoration))
- {
- // Should only be one instruction marked this way
- SLANG_ASSERT(entryPointParam == nullptr);
- entryPointParam = param;
- continue;
- }
- else
- {
- // There should only be one instruction representing
- // the global-scope shader parameters.
- //
- SLANG_ASSERT(globalParam == nullptr);
- globalParam = param;
- continue;
- }
- }
-}
-
void CPPSourceEmitter::emitModuleImpl(IRModule* module)
{
// Setup all built in types used in the module
@@ -2778,24 +2649,8 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
_emitForwardDeclarations(actions);
- IRGlobalParam* entryPointParams = nullptr;
- IRGlobalParam* globalParams = nullptr;
- _findShaderParams(&entryPointParams, &globalParams);
- // Output the 'Context' which will be used for execution
{
- m_writer->emit("struct KernelContext\n{\n");
- m_writer->indent();
-
- if (globalParams)
- {
- emitGlobalInst(globalParams);
- }
- if (entryPointParams)
- {
- emitGlobalInst(entryPointParams);
- }
-
// Output all the thread locals
for (auto action : actions)
{
@@ -2818,9 +2673,6 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
// These wrapper functions takes an abstract type parameter (void*)
// in the place of `this` parameter.
_emitWitnessTableWrappers();
-
- m_writer->dedent();
- m_writer->emit("};\n\n");
}
// Emit all witness table definitions.
@@ -2856,11 +2708,11 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
String threadFuncName = builder;
- _emitEntryPointDefinitionStart(func, entryPointParams, globalParams, threadFuncName, UnownedStringSlice::fromLiteral("ComputeThreadVaryingInput"));
+ _emitEntryPointDefinitionStart(func, threadFuncName, UnownedStringSlice::fromLiteral("ComputeThreadVaryingInput"));
- m_writer->emit("context._");
+ m_writer->emit("_");
m_writer->emit(funcName);
- m_writer->emit("(varyingInput);\n");
+ m_writer->emit("(varyingInput, entryPointParams, globalParams);\n");
_emitEntryPointDefinitionEnd(func);
}
@@ -2873,7 +2725,7 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
String groupFuncName = builder;
- _emitEntryPointDefinitionStart(func, entryPointParams, globalParams, groupFuncName, UnownedStringSlice::fromLiteral("ComputeVaryingInput"));
+ _emitEntryPointDefinitionStart(func, groupFuncName, UnownedStringSlice::fromLiteral("ComputeVaryingInput"));
m_writer->emit("ComputeThreadVaryingInput threadInput = {};\n");
m_writer->emit("threadInput.groupID = varyingInput->startGroupID;\n");
@@ -2884,7 +2736,7 @@ void CPPSourceEmitter::emitModuleImpl(IRModule* module)
// Emit the main version - which takes a dispatch size
{
- _emitEntryPointDefinitionStart(func, entryPointParams, globalParams, funcName, UnownedStringSlice::fromLiteral("ComputeVaryingInput"));
+ _emitEntryPointDefinitionStart(func, funcName, UnownedStringSlice::fromLiteral("ComputeVaryingInput"));
m_writer->emit("ComputeVaryingInput vi = *varyingInput;\n");
m_writer->emit("ComputeVaryingInput groupVaryingInput = {};\n");
diff --git a/source/slang/slang-emit-cpp.h b/source/slang/slang-emit-cpp.h
index 13f99c19b..29d6e215e 100644
--- a/source/slang/slang-emit-cpp.h
+++ b/source/slang/slang-emit-cpp.h
@@ -84,11 +84,6 @@ protected:
void _emitForwardDeclarations(const List<EmitAction>& actions);
- /// Find the IR global parameters representing the entry-point and global shader parameters (if any)
- void _findShaderParams(
- IRGlobalParam** outEntryPointParam,
- IRGlobalParam** outGlobalParam);
-
void _emitAryDefinition(const HLSLIntrinsic* specOp);
// Really we don't want any of these defined like they are here, they should be defined in slang stdlib
@@ -119,7 +114,7 @@ protected:
SlangResult _calcCPPTextureTypeName(IRTextureTypeBase* texType, StringBuilder& outName);
- void _emitEntryPointDefinitionStart(IRFunc* func, IRGlobalParam* entryPointParams, IRGlobalParam* globalParams, const String& funcName, const UnownedStringSlice& varyingTypeName);
+ void _emitEntryPointDefinitionStart(IRFunc* func, const String& funcName, const UnownedStringSlice& varyingTypeName);
void _emitEntryPointDefinitionEnd(IRFunc* func);
void _emitEntryPointGroup(const Int sizeAlongAxis[kThreadGroupAxisCount], const String& funcName);
void _emitEntryPointGroupRange(const Int sizeAlongAxis[kThreadGroupAxisCount], const String& funcName);
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index c7dee9f9d..6f24d5b74 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -248,6 +248,19 @@ void CUDASourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPoin
SLANG_UNUSED(entryPointDecor);
}
+void CUDASourceEmitter::emitFunctionPreambleImpl(IRInst* inst)
+{
+ if(inst && inst->findDecoration<IREntryPointDecoration>())
+ {
+ m_writer->emit("extern \"C\" __global__ ");
+ }
+ else
+ {
+ m_writer->emit("__device__ ");
+ }
+}
+
+
void CUDASourceEmitter::emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec)
{
switch (specOp->op)
@@ -661,10 +674,6 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module)
_emitForwardDeclarations(actions);
- IRGlobalParam* entryPointParams = nullptr;
- IRGlobalParam* globalParams = nullptr;
- _findShaderParams(&entryPointParams, &globalParams);
-
// Output group shared variables
{
@@ -677,20 +686,7 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module)
}
}
- // Output the 'Context' which will be used for execution
{
- m_writer->emit("struct KernelContext\n{\n");
- m_writer->indent();
-
- if (globalParams)
- {
- emitGlobalInst(globalParams);
- }
- if (entryPointParams)
- {
- emitGlobalInst(entryPointParams);
- }
-
// Output all the thread locals
for (auto action : actions)
{
@@ -708,211 +704,7 @@ void CUDASourceEmitter::emitModuleImpl(IRModule* module)
emitGlobalInst(action.inst);
}
}
-
- m_writer->dedent();
- m_writer->emit("};\n\n");
}
-
- // Finally we need to output dll entry points
-
- for (auto action : actions)
- {
- if (action.level == EmitAction::Level::Definition && as<IRFunc>(action.inst))
- {
- IRFunc* func = as<IRFunc>(action.inst);
-
- IREntryPointDecoration* entryPointDecor = func->findDecoration<IREntryPointDecoration>();
-
- if (entryPointDecor)
- {
- // We have an entry-point function in the IR module, which we
- // will want to emit as a `__global__` function in the generated
- // CUDA C++.
- //
- // The most common case will be a compute kernel, in which case
- // we will emit the function more or less as-is, including
- // usingits original name as the name of the global symbol.
- //
- String funcName = getName(func);
- String globalSymbolName = funcName;
-
- // We also suport emitting ray tracing kernels for use with
- // OptiX, and in that case the name of the global symbol
- // must be prefixed to indicate to the OptiX runtime what
- // stage it is to be compiled for.
- //
- auto stage = entryPointDecor->getProfile().getStage();
- switch( stage )
- {
- default:
- break;
-
- #define CASE(STAGE, PREFIX) \
- case Stage::STAGE: globalSymbolName = #PREFIX + funcName; break
-
- CASE(RayGeneration, __raygen__);
- // TODO: Add the other ray tracing shader stages here.
- #undef CASE
- }
-
- if(globalParams && stage != Stage::Compute )
- {
- // Non-compute shaders (currently just OptiX ray tracing kernels)
- // require parameter data that is shared across multiple kernels
- // (which in our case is the global-scope shader parameters)
- // to be passed using a global `__constant__` variable.
- //
- // The use of `"C"` linkage here is required because the name
- // of this symbol must be passed to the OptiX API when creating
- // a pipeline that uses this compiled module. The exact name
- // used here (`SLANG_globalParams`) is thus a part of the
- // binary interface for Slang->OptiX translation.
- //
- // TODO: We need to make a decision about how indirected
- // the parameter passing for global-scope data is going to
- // be for CUDA and OptiX (ideally with an answer that is
- // consistent across the two). For now we are deciding to
- // make this global `__constant__` variable represent the
- // global parameter data directly, rather than indirectly.
- //
- auto globalParamsPtrType = as<IRPointerLikeType>(globalParams->getDataType());
- SLANG_ASSERT(globalParamsPtrType);
- auto gloablParamsElementType = globalParamsPtrType->getElementType();
- //
- m_writer->emit("extern \"C\" { __constant__ ");
- emitType(gloablParamsElementType, "SLANG_globalParams");
- m_writer->emit("; }\n");
- }
-
- // As a convenience for anybody reading the generated
- // CUDA C++ code, we will prefix a compute kernel
- // with the information from the `[numthreads(...)]`
- // attribute in the source.
- //
- if(stage == Stage::Compute)
- {
- Int sizeAlongAxis[kThreadGroupAxisCount];
- getComputeThreadGroupSize(func, sizeAlongAxis);
-
- //
- m_writer->emit("// [numthreads(");
- for (int ii = 0; ii < kThreadGroupAxisCount; ++ii)
- {
- if (ii != 0) m_writer->emit(", ");
- m_writer->emit(sizeAlongAxis[ii]);
- }
- m_writer->emit(")]\n");
- }
-
- m_writer->emit("extern \"C\" __global__ ");
-
- auto resultType = func->getResultType();
-
- // Emit the actual function
- emitEntryPointAttributes(func, entryPointDecor);
- emitType(resultType, globalSymbolName);
-
- if( stage == Stage::Compute )
- {
- // CUDA compute shaders take all of their parameters explicitly as
- // part of the entry-point parameter list. This means that the
- // data representing Slang shader parameters at both the global
- // and entry-point scopes needs to be passed as parameters.
- //
- // At the binary level, our generated CUDA compute kernels will take
- // two pointer parameters: the first points to the per-entry-point
- // `uniform` parameter data, and the second points to the global-scope
- // parameter data (if any).
- //
- m_writer->emit("(void* entryPointParams, void* globalParams)");
- }
- else
- {
- // Non-compute shaders (currently just OptiX ray tracing kernels)
- // rely on other mechanisms for parameter passing, and thus use
- // an empty parameter list on the kernel declaration.
- //
- m_writer->emit("()");
- }
-
- emitSemantics(func);
- m_writer->emit("\n{\n");
- m_writer->indent();
-
- // Initialize when constructing so that globals are zeroed
- m_writer->emit("KernelContext context = {};\n");
-
- // The global-scope parameter data got passed in differently depending on whether we have
- // a compute shader or a ray-tracing shader, so we need to alter how we initialize
- // the pointer in our `context` based on the stage.
- //
- if( globalParams )
- {
- if( stage == Stage::Compute )
- {
- m_writer->emit("context.");
- m_writer->emit(getName(globalParams));
- m_writer->emit(" = (");
- emitType(globalParams->getDataType());
- m_writer->emit(")globalParams;\n");
- }
- else
- {
- m_writer->emit("context.");
- m_writer->emit(getName(globalParams));
- m_writer->emit(" = &SLANG_globalParams;\n");
- }
- }
-
- if (entryPointParams)
- {
- auto varDecl = entryPointParams;
- auto rawType = varDecl->getDataType();
- auto varType = rawType;
-
- m_writer->emit("context.");
- m_writer->emit(getName(varDecl));
- m_writer->emit(" = (");
- emitType(varType);
- m_writer->emit(")");
-
- // Similar to the case for global parameter data above, the entry-point
- // uniform parameter data gets passed in differently for compute kernels
- // vs. ray-tracing kernels, and we need to handle the two cases here.
- //
- if( stage == Stage::Compute )
- {
- // In the compute case, the entry-point uniform parameters came
- // in as an explicit parameter on the CUDA kernel, and we simply
- // cast it to the expected type here.
- //
- m_writer->emit("entryPointParams");
- }
- else
- {
- // In the ray-tracing case, the entry-point uniform parameters
- // implicitly map to the contents of the Shader Binding Table
- // (SBT) entry for the entry point instance being invoked.
- //
- // The OptiX API provides an accessor function to get a pointer
- // to the SBT data for the current entry, and we cast the result
- // of that to the expected type.
- //
- m_writer->emit("optixGetSbtDataPointer()");
- }
- m_writer->emit(";\n");
- }
-
- m_writer->emit("context.");
- m_writer->emit(funcName);
- m_writer->emit("();\n");
-
- m_writer->dedent();
- m_writer->emit("}\n");
- }
- }
- }
-
}
diff --git a/source/slang/slang-emit-cuda.h b/source/slang/slang-emit-cuda.h
index 9afd34c4b..ae78a5e57 100644
--- a/source/slang/slang-emit-cuda.h
+++ b/source/slang/slang-emit-cuda.h
@@ -56,7 +56,7 @@ protected:
virtual void emitVarDecorationsImpl(IRInst* varDecl) SLANG_OVERRIDE;
virtual void emitMatrixLayoutModifiersImpl(IRVarLayout* layout) SLANG_OVERRIDE;
virtual void emitCall(const HLSLIntrinsic* specOp, IRInst* inst, const IRUse* operands, int numOperands, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
- virtual void emitFunctionPreambleImpl(IRInst* inst) SLANG_OVERRIDE { SLANG_UNUSED(inst); m_writer->emit("__device__ "); }
+ virtual void emitFunctionPreambleImpl(IRInst* inst) SLANG_OVERRIDE;
virtual void emitLoopControlDecorationImpl(IRLoopControlDecoration* decl) SLANG_OVERRIDE;
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index d35acf4df..63bf8f054 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -9,6 +9,9 @@
#include "slang-ir-collect-global-uniforms.h"
#include "slang-ir-dce.h"
#include "slang-ir-entry-point-uniforms.h"
+#include "slang-ir-entry-point-raw-ptr-params.h"
+#include "slang-ir-explicit-global-context.h"
+#include "slang-ir-explicit-global-init.h"
#include "slang-ir-glsl-legalize.h"
#include "slang-ir-insts.h"
#include "slang-ir-legalize-varying-params.h"
@@ -255,11 +258,42 @@ Result linkAndOptimizeIR(
// parameters of a shader entry point and move them into
// the global scope instead.
//
- moveEntryPointUniformParamsToGlobalScope(irModule);
-#if 0
- dumpIRIfEnabled(compileRequest, irModule, "ENTRY POINT UNIFORMS MOVED");
-#endif
- validateIRModuleIfEnabled(compileRequest, irModule);
+ // TODO: We should skip this step for CUDA targets.
+ //
+ {
+ CollectEntryPointUniformParamsOptions passOptions;
+ switch( target )
+ {
+ default:
+ break;
+
+ case CodeGenTarget::CPPSource:
+ case CodeGenTarget::CUDASource:
+ passOptions.alwaysCreateCollectedParam = true;
+ break;
+ }
+
+ collectEntryPointUniformParams(irModule, passOptions);
+ #if 0
+ dumpIRIfEnabled(compileRequest, irModule, "ENTRY POINT UNIFORMS COLLECTED");
+ #endif
+ validateIRModuleIfEnabled(compileRequest, irModule);
+ }
+
+ switch( target )
+ {
+ default:
+ moveEntryPointUniformParamsToGlobalScope(irModule);
+ #if 0
+ dumpIRIfEnabled(compileRequest, irModule, "ENTRY POINT UNIFORMS MOVED");
+ #endif
+ validateIRModuleIfEnabled(compileRequest, irModule);
+ break;
+
+ case CodeGenTarget::CPPSource:
+ case CodeGenTarget::CUDASource:
+ break;
+ }
// Desguar any union types, since these will be illegal on
@@ -608,6 +642,24 @@ Result linkAndOptimizeIR(
break;
}
+
+ switch( target )
+ {
+ default:
+ break;
+
+ case CodeGenTarget::CPPSource:
+ case CodeGenTarget::CUDASource:
+ moveGlobalVarInitializationToEntryPoints(irModule);
+ introduceExplicitGlobalContext(irModule, target);
+ convertEntryPointPtrParamsToRawPtrs(irModule);
+ #if 0
+ dumpIRIfEnabled(compileRequest, irModule, "EXPLICIT GLOBAL CONTEXT INTRODUCED");
+ #endif
+ validateIRModuleIfEnabled(compileRequest, irModule);
+ break;
+ }
+
if (!compileRequest->allowDynamicCode)
{
// For all targets that don't support true dynamic dispatch through
diff --git a/source/slang/slang-ir-entry-point-raw-ptr-params.cpp b/source/slang/slang-ir-entry-point-raw-ptr-params.cpp
new file mode 100644
index 000000000..a9615a42a
--- /dev/null
+++ b/source/slang/slang-ir-entry-point-raw-ptr-params.cpp
@@ -0,0 +1,121 @@
+// slang-ir-entry-point-raw-ptr-params.cpp
+#include "slang-ir-entry-point-raw-ptr-params.h"
+
+#include "slang-ir-insts.h"
+
+namespace Slang
+{
+
+// This pass transforms the entry points in a module
+// so that any entry-point parameters of pointer
+// type (or a pointer-like type like `ConstantBuffer<T>`)
+// are replaced with parameters of raw pointer (`void*`)
+// type, with a cast in teh function body used to
+// produce a value of the expected type.
+
+struct ConvertEntryPointPtrParamsToRawPtrsPass
+{
+ IRModule* m_module;
+
+ void processModule()
+ {
+ SharedIRBuilder sharedBuilder(m_module);
+ IRBuilder builder(&sharedBuilder);
+
+ // We start by getting and caching the raw pointer type.
+ //
+ auto rawPtrType = builder.getRawPointerType();
+
+ // Now we loop over global-scope instructions searching
+ // for any entry points.
+ //
+ for( auto inst : m_module->getGlobalInsts() )
+ {
+ auto func = as<IRFunc>(inst);
+ if(!func)
+ continue;
+
+ if( !func->findDecoration<IREntryPointDecoration>() )
+ continue;
+
+ // We can only modify entry points with definitions here.
+ //
+ auto firstBlock = func->getFirstBlock();
+ if(!firstBlock)
+ continue;
+
+ // Any code we introduce for casts will need to be inserted
+ // before the first ordinary instruction in the first block
+ // of the function (right after the parameters).
+ //
+ builder.setInsertBefore(firstBlock->getFirstOrdinaryInst());
+
+ // Note: because we are inserting code right after the parameters
+ // it doesn't work here to use `firstBlock->getParams()`, because
+ // that captures a begin/end range where the "end" is the
+ // first ordinary instruction at the time of the call, which will
+ // chane when we insert code.
+ //
+ // TODO: We chould probably change the represnetation of ranges
+ // of instructions to use first/last instead of begin/end so
+ // that ranges are robust against changes to instructions outside
+ // of a range.
+ //
+ for( auto param = firstBlock->getFirstParam(); param; param = param->getNextParam() )
+ {
+ // We only want to transform parameters of pointer or
+ // pointer-like type.
+ //
+ auto paramType = param->getDataType();
+ if(!as<IRPtrTypeBase>(paramType) && !as<IRPointerLikeType>(paramType))
+ continue;
+
+ // We will overwrite the type of the parameter to
+ // be the raw pointer type instead.
+ //
+ builder.setDataType(param, rawPtrType);
+
+ // We are going to replace uses of the parameter with
+ // uses of a bit-cast operation based on the parameter,
+ // but we need to be careful because that bit-cast operation
+ // will itself be a use (which we don't want to replace
+ // because that would create a circularity).
+ //
+ // Instead we capture the list of uses *before* we create
+ // the bit cast instruction.
+ //
+ List<IRUse*> uses;
+ for(auto use = param->firstUse; use; use = use->nextUse)
+ uses.add(use);
+
+ // Now we emit a bit-cast operation into the first block
+ // of the entry-point function to cast the raw-pointer
+ // parameter to the type that the body code expects.
+ //
+ auto cast = builder.emitBitCast(paramType, param);
+
+ // Now we can replace all the (captured) uses of the
+ // parameter with the bit-cast operation instead.
+ //
+ for(auto use : uses)
+ use->set(cast);
+ }
+
+ // Because our operation might have changed the parameter
+ // types of the function, we need to make sure to fix up
+ // the IR type of the function to match its parameter list.
+ //
+ fixUpFuncType(func);
+ }
+ }
+};
+
+void convertEntryPointPtrParamsToRawPtrs(
+ IRModule* module)
+{
+ ConvertEntryPointPtrParamsToRawPtrsPass pass;
+ pass.m_module = module;
+ pass.processModule();
+}
+
+}
diff --git a/source/slang/slang-ir-entry-point-raw-ptr-params.h b/source/slang/slang-ir-entry-point-raw-ptr-params.h
new file mode 100644
index 000000000..6973b73f1
--- /dev/null
+++ b/source/slang/slang-ir-entry-point-raw-ptr-params.h
@@ -0,0 +1,12 @@
+// slang-ir-entry-point-raw-ptr-params.h
+#pragma once
+
+namespace Slang
+{
+struct IRModule;
+
+ /// Convert any entry-point parameters that use pointer types to use raw pointers (`void*`)
+void convertEntryPointPtrParamsToRawPtrs(
+ IRModule* module);
+
+}
diff --git a/source/slang/slang-ir-entry-point-uniforms.cpp b/source/slang/slang-ir-entry-point-uniforms.cpp
index 9c3c029a5..47e361d07 100644
--- a/source/slang/slang-ir-entry-point-uniforms.cpp
+++ b/source/slang/slang-ir-entry-point-uniforms.cpp
@@ -10,7 +10,7 @@ namespace Slang
{
-// The transformation in this file will solve the problem of taking
+// The transformations in this file will solve the problem of taking
// code like the following:
//
// float4 fragmentMain(
@@ -88,21 +88,103 @@ namespace Slang
// `params` above into individual variables for the `t` and
// `s` fields.
-// The overall structure here is similar to many other IR passes.
-// We define a "context" structure to encapsulate the pass.
+// For clarity and flexibility, the work is split across two
+// different IR passes:
//
-struct MoveEntryPointUniformParametersToGlobalScope
+// * The first pass simply collects together uniform parameters
+// into a single parameter of `struct` or `ConstantBuffer<...>` type.
+//
+// * The second pass transforms entry-point uniform parameters
+// into global shader parameters.
+
+// First we start with some helper subroutines for detecting
+// whether a parameter represents a varying input rather than
+// a uniform parameter.
+
+
+// In order to determine whether a parameter is varying based on its
+// layout, we need to know which resource kinds represent varying
+// shader parameters.
+//
+bool isVaryingResourceKind(LayoutResourceKind kind)
+{
+ switch( kind )
+ {
+ default:
+ return false;
+
+ // Note: The set of cases that are considered
+ // varying here would need to be extended if we
+ // add more fine-grained resource kinds (e.g.,
+ // if we ever add an explicit resource kind
+ // for geometry shader output streams).
+ //
+ // Ordinary varying input/output:
+ case LayoutResourceKind::VaryingInput:
+ case LayoutResourceKind::VaryingOutput:
+ //
+ // Ray-tracing shader input/output:
+ case LayoutResourceKind::CallablePayload:
+ case LayoutResourceKind::HitAttributes:
+ case LayoutResourceKind::RayPayload:
+ return true;
+ }
+}
+
+bool isVaryingParameter(IRTypeLayout* typeLayout)
+{
+ // If *any* of the resources consumed by the parameter type
+ // is *not* a varying resource kind, then we consider the
+ // whole parameter to be uniform (and thus not varying).
+ //
+ // Note that this means that an empty type will always
+ // be considered varying, even if it had been explicitly
+ // marked `uniform`.
+ //
+ // Note that this logic rules out support for parameters
+ // that mix varying and non-varying resource kinds.
+ //
+ // TODO: This whole convoluted definition exists because
+ // we currently don't give system-value parameters any
+ // reosurce kind, so they show up as empty. Simply
+ // adding `LayoutResourceKind`s for system-value inputs
+ // and outputs would allow for simpler logic here.
+ //
+ for(auto sizeAttr : typeLayout->getSizeAttrs())
+ {
+ if(!isVaryingResourceKind(sizeAttr->getResourceKind()))
+ return false;
+ }
+ return true;
+}
+
+bool isVaryingParameter(IRVarLayout* varLayout)
+{
+ return isVaryingParameter(varLayout->getTypeLayout());
+}
+
+// Our two passes have a fair amount in common in terms of
+// how they traverse the IR, so we will factor out the
+// shared logic into a base type.
+
+struct PerEntryPointPass
{
// We'll hang on to the module we are processing,
// so that we can refer to it when setting up `IRBuilder`s.
//
IRModule* module;
+
+ SharedIRBuilder* m_sharedBuilder = nullptr;
+
// We will process a whole module by visiting all
// its global functions, looking for entry points.
//
void processModule()
{
+ SharedIRBuilder sharedBuilder(module);
+ m_sharedBuilder = &sharedBuilder;
+
// Note that we are only looking at true global-scope
// functions and not functions nested inside of
// IR generics. When using generic entry points, this
@@ -130,21 +212,57 @@ struct MoveEntryPointUniformParametersToGlobalScope
if( !func->findDecorationImpl(kIROp_EntryPointDecoration) )
continue;
- // If we fine a candidate entry point, then we
+ // If we find a candidate entry point, then we
// will process it.
//
processEntryPoint(func);
}
}
- void processEntryPoint(IRFunc* func)
+ void processEntryPoint(IRFunc* entryPointFunc)
+ {
+ m_entryPointFunc = entryPointFunc;
+ processEntryPointImpl(entryPointFunc);
+ }
+
+ IRFunc* m_entryPointFunc = nullptr;
+
+ virtual void processEntryPointImpl(IRFunc* entryPointFunc) = 0;
+};
+
+
+struct CollectEntryPointUniformParams : PerEntryPointPass
+{
+ CollectEntryPointUniformParamsOptions m_options;
+
+ // *If* the entry point has any uniform parameter then we want to create a
+ // structure type to house them, and a single collected shader parameter (either
+ // an instance of that type or a constant buffer).
+ //
+ // We only want to create these if actually needed, so we will declare
+ // them here and then initialize them on-demand.
+ //
+ IRStructType* paramStructType = nullptr;
+ IRParam* collectedParam = nullptr;
+
+ IRVarLayout* entryPointParamsLayout = nullptr;
+ bool needConstantBuffer = false;
+
+ void processEntryPointImpl(IRFunc* entryPointFunc) SLANG_OVERRIDE
{
+ // This pass object may be used across multiple entry points,
+ // so we need to make sure to reset state that could have been
+ // left over from a previous entry point.
+ //
+ paramStructType = nullptr;
+ collectedParam = nullptr;
+
// We expect all entry points to have explicit layout information attached.
//
// We will assert that we have the information we need, but try to be
// defensive and bail out in the failure case in release builds.
//
- auto funcLayoutDecoration = func->findDecoration<IRLayoutDecoration>();
+ auto funcLayoutDecoration = entryPointFunc->findDecoration<IRLayoutDecoration>();
SLANG_ASSERT(funcLayoutDecoration);
if(!funcLayoutDecoration)
return;
@@ -161,31 +279,18 @@ struct MoveEntryPointUniformParametersToGlobalScope
// If we are in the latter case we will need to make sure to allocate
// an explicit IR constant buffer for that wrapper,
//
- auto entryPointParamsLayout = entryPointLayout->getParamsLayout();
- bool needConstantBuffer = as<IRParameterGroupTypeLayout>(entryPointParamsLayout->getTypeLayout()) != nullptr;
+ entryPointParamsLayout = entryPointLayout->getParamsLayout();
+ needConstantBuffer = as<IRParameterGroupTypeLayout>(entryPointParamsLayout->getTypeLayout()) != nullptr;
auto entryPointParamsStructLayout = getScopeStructLayout(entryPointLayout);
// We will set up an IR builder so that we are ready to generate code.
//
- SharedIRBuilder sharedBuilderStorage;
- auto sharedBuilder = &sharedBuilderStorage;
- sharedBuilder->module = module;
- sharedBuilder->session = module->getSession();
-
- IRBuilder builderStorage;
+ IRBuilder builderStorage(m_sharedBuilder);
auto builder = &builderStorage;
- builder->sharedBuilder = sharedBuilder;
- // *If* the entry point has any uniform parameter then we want to create a
- // structure type to house them, and a global shader parameter (either
- // an instance of that type or a constant buffer).
- //
- // We only want to create these if actually needed, so we will declare
- // them here and then initialize them on-demand.
- //
- IRStructType* paramStructType = nullptr;
- IRGlobalParam* globalParam = nullptr;
+ if(m_options.alwaysCreateCollectedParam)
+ ensureCollectedParamAndTypeHaveBeenCreated();
// We will be removing any uniform parameters we run into, so we
// need to iterate the parameter list carefully to deal with
@@ -193,7 +298,7 @@ struct MoveEntryPointUniformParametersToGlobalScope
//
IRParam* nextParam = nullptr;
UInt paramCounter = 0;
- for( IRParam* param = func->getFirstParam(); param; param = nextParam )
+ for( IRParam* param = entryPointFunc->getFirstParam(); param; param = nextParam )
{
nextParam = param->getNextParam();
UInt paramIndex = paramCounter++;
@@ -225,62 +330,9 @@ struct MoveEntryPointUniformParametersToGlobalScope
// to deal with creating the structure type and global shader
// parameter that our transformed entry point will use.
//
- if( !paramStructType )
- {
- // First we create the structure to hold the parameters.
- //
- builder->setInsertBefore(func);
- paramStructType = builder->createStructType();
- builder->addNameHintDecoration(paramStructType, UnownedTerminatedStringSlice("EntryPointParams"));
-
- if( needConstantBuffer )
- {
- // If we need a constant buffer, then the global
- // shader parameter will be a `ConstantBuffer<paramStructType>`
- //
- auto constantBufferType = builder->getConstantBufferType(paramStructType);
- globalParam = builder->createGlobalParam(constantBufferType);
- }
- else
- {
- // Otherwise, the global shader parameter is just
- // an instance of `paramStructType`.
- //
- globalParam = builder->createGlobalParam(paramStructType);
- }
-
- // No matter what, the global shader parameter should have the layout
- // information from the entry point attached to it, so that the
- // contained parameters will end up in the right place(s).
- //
- builder->addLayoutDecoration(globalParam, entryPointParamsLayout);
-
- // We add a name hint to the global parameter so that it will
- // emit to more readable code when referenced.
- //
- builder->addNameHintDecoration(globalParam, UnownedTerminatedStringSlice("entryPointParams"));
-
- // We also decorate the parameter for the entry-point parameters
- // so that we can find it again in downstream passes (like emit
- // for CPU/CUDA) that might want to treat entry-point parameters
- // different from other cases.
- //
- // TODO: Once we have support for multiple entry points to be emitted
- // at once, we need a way to associate these per-entry-point parameters
- // more closely with the original entry point. The two easiest options
- // are:
- //
- // 1. Don't move the new aggregate parameter to the global scope
- // on those targets, and instead keep it as a parameter of the
- // entry point.
- //
- // 2. Use a decoration on the entry point itself to point at the
- // global parameter for its per-entry-point parameter data.
- //
- builder->addDecoration(globalParam, kIROp_EntryPointParamDecoration);
- }
+ ensureCollectedParamAndTypeHaveBeenCreated();
- // Now that we've ensured the global `struct` type and shader paramter
+ // Now that we've ensured the global `struct` type and collected shader paramter
// exist, we need to add a field to the `struct` to represent the
// current parameter.
//
@@ -349,7 +401,7 @@ struct MoveEntryPointUniformParametersToGlobalScope
//
auto fieldAddress = builder->emitFieldAddress(
builder->getPtrType(paramType),
- globalParam,
+ collectedParam,
paramFieldKey);
fieldVal = builder->emitLoad(fieldAddress);
}
@@ -361,7 +413,7 @@ struct MoveEntryPointUniformParametersToGlobalScope
//
fieldVal = builder->emitFieldExtract(
paramType,
- globalParam,
+ collectedParam,
paramFieldKey);
}
@@ -380,76 +432,140 @@ struct MoveEntryPointUniformParametersToGlobalScope
param->removeAndDeallocate();
}
- fixUpFuncType(func);
+ if( collectedParam )
+ {
+ collectedParam->insertBefore(entryPointFunc->getFirstBlock()->getFirstChild());
+ }
+
+ fixUpFuncType(entryPointFunc);
}
- // We need to be able to determine if a parameter is logically
- // a "varying" parameter based on its layout.
- //
- bool isVaryingParameter(IRVarLayout* layout)
+ void ensureCollectedParamAndTypeHaveBeenCreated()
{
- // If *any* of the resources consumed by the parameter
- // is a varying resource kind (e.g., varying input) then
- // we consider the whole parameter to be varying.
- //
- // This is reasonable because there is no way to declare
- // a parameter that mixes varying and non-varying fields.
+ if(paramStructType)
+ return;
+
+ IRBuilder builder(m_sharedBuilder);
+
+ // First we create the structure to hold the parameters.
//
- for( auto resInfo : layout->getOffsetAttrs() )
+ builder.setInsertBefore(m_entryPointFunc);
+ paramStructType = builder.createStructType();
+ builder.addNameHintDecoration(paramStructType, UnownedTerminatedStringSlice("EntryPointParams"));
+
+ if( needConstantBuffer )
{
- if(isVaryingResourceKind(resInfo->getResourceKind()))
- return true;
+ // If we need a constant buffer, then the global
+ // shader parameter will be a `ConstantBuffer<paramStructType>`
+ //
+ auto constantBufferType = builder.getConstantBufferType(paramStructType);
+ collectedParam = builder.createParam(constantBufferType);
+ }
+ else
+ {
+ // Otherwise, the global shader parameter is just
+ // an instance of `paramStructType`.
+ //
+ collectedParam = builder.createParam(paramStructType);
}
- // TODO(JS): We probably want a more accurate way of determining if system semantic value
- // We can use the flags Flag::SemanticValue for one. But main issue with this test, is for some
- // targets currently (CPU) no resources are consumed. Perhaps this is fixed elsewhere by using a 'notional' resource.
-
- // Varying parameters with "system value" semantics currently show up as
- // consuming no resources, so we need to special-case that here.
- //
- // Note: an empty `struct` parameter would also show up the same way, but
- // we should eliminate any such parameters later on during type legalization.
+ // No matter what, the global shader parameter should have the layout
+ // information from the entry point attached to it, so that the
+ // contained parameters will end up in the right place(s).
//
- if(layout->getOffsetAttrs().getCount() == 0)
- return true;
+ builder.addLayoutDecoration(collectedParam, entryPointParamsLayout);
- // if none of the above tests determined that the
- // parameter was varying, then we can safely consider
- // it to be non-varying (uniform):
- return false;
+ // We add a name hint to the global parameter so that it will
+ // emit to more readable code when referenced.
+ //
+ builder.addNameHintDecoration(collectedParam, UnownedTerminatedStringSlice("entryPointParams"));
}
+};
- // In order to determine whether a parameter is varying based on its
- // layout, we need to know which resource kinds represent varying
- // shader parameters.
- //
- bool isVaryingResourceKind(LayoutResourceKind kind)
+struct MoveEntryPointUniformParametersToGlobalScope : PerEntryPointPass
+{
+ void processEntryPointImpl(IRFunc* entryPointFunc) SLANG_OVERRIDE
{
- switch( kind )
+ // We will set up an IR builder so that we are ready to generate code.
+ //
+ IRBuilder builderStorage(m_sharedBuilder);
+ auto builder = &builderStorage;
+
+ builder->setInsertBefore(entryPointFunc);
+
+ // We will be removing any uniform parameters we run into, so we
+ // need to iterate the parameter list carefully to deal with
+ // us modifying it along the way.
+ //
+ IRParam* nextParam = nullptr;
+ for( IRParam* param = entryPointFunc->getFirstParam(); param; param = nextParam )
{
- default:
- return false;
+ nextParam = param->getNextParam();
- // Note: The set of cases that are considered
- // varying here would need to be extended if we
- // add more fine-grained resource kinds (e.g.,
- // if we ever add an explicit resource kind
- // for geometry shader output streams).
+ // We expect all entry-point parameters to have layout information,
+ // but we will be defensive and skip parameters without the required
+ // information when we are in a release build.
+ //
+ auto layoutDecoration = param->findDecoration<IRLayoutDecoration>();
+ SLANG_ASSERT(layoutDecoration);
+ if(!layoutDecoration)
+ continue;
+ auto paramLayout = as<IRVarLayout>(layoutDecoration->getLayout());
+ SLANG_ASSERT(paramLayout);
+ if(!paramLayout)
+ continue;
+
+ // A parameter that has varying input/output behavior should be left alone,
+ // since this pass is only supposed to apply to uniform (non-varying)
+ // parameters.
+ //
+ if(isVaryingParameter(paramLayout))
+ continue;
+
+ auto paramType = param->getFullType();
+
+ builder->setInsertBefore(entryPointFunc);
+ auto globalParam = builder->createGlobalParam(paramType);
+
+ param->transferDecorationsTo(globalParam);
+
+ // We also decorate the parameter for the entry-point parameters
+ // so that we can find it again in downstream passes (like emit
+ // for CPU/CUDA) that might want to treat entry-point parameters
+ // different from other cases.
+ //
+ // TODO: Once we have support for multiple entry points to be emitted
+ // at once, we need a way to associate these per-entry-point parameters
+ // more closely with the original entry point. The two easiest options
+ // are:
+ //
+ // 1. Don't move the new aggregate parameter to the global scope
+ // on those targets, and instead keep it as a parameter of the
+ // entry point.
//
- // Ordinary varying input/output:
- case LayoutResourceKind::VaryingInput:
- case LayoutResourceKind::VaryingOutput:
+ // 2. Use a decoration on the entry point itself to point at the
+ // global parameter for its per-entry-point parameter data.
//
- // Ray-tracing shader input/output:
- case LayoutResourceKind::CallablePayload:
- case LayoutResourceKind::HitAttributes:
- case LayoutResourceKind::RayPayload:
- return true;
+ builder->addDecoration(globalParam, kIROp_EntryPointParamDecoration);
+
+ param->replaceUsesWith(globalParam);
+ param->removeAndDeallocate();
}
+
+ fixUpFuncType(entryPointFunc);
}
};
+void collectEntryPointUniformParams(
+ IRModule* module,
+ CollectEntryPointUniformParamsOptions const& options)
+{
+ CollectEntryPointUniformParams context;
+ context.module = module;
+ context.m_options = options;
+ context.processModule();
+}
+
void moveEntryPointUniformParamsToGlobalScope(
IRModule* module)
{
diff --git a/source/slang/slang-ir-entry-point-uniforms.h b/source/slang/slang-ir-entry-point-uniforms.h
index 3b6a0743b..c2c131d61 100644
--- a/source/slang/slang-ir-entry-point-uniforms.h
+++ b/source/slang/slang-ir-entry-point-uniforms.h
@@ -7,6 +7,16 @@ namespace Slang
{
struct IRModule;
+struct CollectEntryPointUniformParamsOptions
+{
+ bool alwaysCreateCollectedParam;
+};
+
+ /// Collect entry point uniform parameters into a wrapper `struct` and/or buffer
+void collectEntryPointUniformParams(
+ IRModule* module,
+ CollectEntryPointUniformParamsOptions const& options);
+
/// Move any uniform parameters of entry points to the global scope instead.
void moveEntryPointUniformParamsToGlobalScope(
IRModule* module);
diff --git a/source/slang/slang-ir-explicit-global-context.cpp b/source/slang/slang-ir-explicit-global-context.cpp
new file mode 100644
index 000000000..68f23461b
--- /dev/null
+++ b/source/slang/slang-ir-explicit-global-context.cpp
@@ -0,0 +1,523 @@
+// slang-ir-explicit-global-context.cpp
+#include "slang-ir-explicit-global-context.h"
+
+#include "slang-ir-insts.h"
+
+namespace Slang
+{
+
+// The job of this pass is take global-scope declarations
+// that are actually scoped to a single shader thread or
+// thread-group, and wrap them up in an explicit "context"
+// type that gets passed between functions.
+
+struct IntroduceExplicitGlobalContextPass
+{
+ IRModule* m_module = nullptr;
+ CodeGenTarget m_target = CodeGenTarget::Unknown;
+
+ SharedIRBuilder* m_sharedBuilder = nullptr;
+ IRStructType* m_contextStructType = nullptr;
+ IRPtrType* m_contextStructPtrType = nullptr;
+
+ IRGlobalParam* m_globalUniformsParam = nullptr;
+ List<IRGlobalVar*> m_globalVars;
+ List<IRFunc*> m_entryPoints;
+
+ void processModule()
+ {
+ SharedIRBuilder sharedBuilder(m_module);
+ m_sharedBuilder = &sharedBuilder;
+
+ IRBuilder builder(&sharedBuilder);
+
+ // The global context will be represneted by a `struct`
+ // type with a name hint of `KernelContext`.
+ //
+ m_contextStructType = builder.createStructType();
+ builder.addNameHintDecoration(m_contextStructType, UnownedTerminatedStringSlice("KernelContext"));
+
+ // The context will usually be passed around by pointer,
+ // so we get and cache that pointer type up front.
+ //
+ m_contextStructPtrType = builder.getPtrType(m_contextStructType);
+
+ // The transformation we will perform will need to affect
+ // global variables, global shader parameters, and entry-point
+ // function (at the very least), and we start with an explicit
+ // pass to collect these entities into explicit lists to simplify
+ // looping over them later.
+ //
+ for( auto inst : m_module->getGlobalInsts() )
+ {
+ switch( inst->op )
+ {
+ case kIROp_GlobalVar:
+ {
+ // A "global variable" in HLSL (and thus Slang) is actually
+ // a weird kind of thread-local variable, and so it cannot
+ // actually be lowered to a global variable on targets where
+ // globals behave like, well, globals.
+ //
+ auto globalVar = cast<IRGlobalVar>(inst);
+
+ // One important exception is that CUDA *does* support
+ // global variables with the `__shared__` qualifer, with
+ // semantics that exactly match HLSL/Slang `groupshared`.
+ //
+ // We thus need to skip processing of global variables
+ // that were marked `groupshared`. In our current IR,
+ // this is represented as a variable with the `@GroupShared`
+ // rate on its type.
+ //
+ if( m_target == CodeGenTarget::CUDASource )
+ {
+ if( as<IRGroupSharedRate>(globalVar->getRate()) )
+ continue;
+ }
+
+ m_globalVars.add(globalVar);
+ }
+ break;
+
+ case kIROp_GlobalParam:
+ {
+ // Global parameters are another HLSL/Slang concept
+ // that doesn't have a parallel in langauges like C/C++.
+ //
+ auto globalParam = cast<IRGlobalParam>(inst);
+
+
+ // One detail we need to be careful about is that as a result
+ // of legalizing the varying parameters of kernels, we can end
+ // up with global parameters for varying parameters on CUDA
+ // (e.g., to represent `threadIdx`. We thus skip any global-scope
+ // parameters that are varying instead of uniform.
+ //
+ auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>();
+ SLANG_ASSERT(layoutDecor);
+ auto layout = as<IRVarLayout>(layoutDecor->getLayout());
+ SLANG_ASSERT(layout);
+ if(isVaryingParameter(layout))
+ continue;
+
+ // Because of upstream passes, we expect there to be only a
+ // single global uniform parameter (at most).
+ //
+ // Note: If we ever changed out mind about the representation
+ // and wanted to support multiple global parameters, we could
+ // easily generalize this code to work with a list.
+ //
+ SLANG_ASSERT(!m_globalUniformsParam);
+ m_globalUniformsParam = globalParam;
+ }
+ break;
+
+ case kIROp_Func:
+ {
+ // Every entry point function is going to need to be modified,
+ // so that it can explicit create the context that other
+ // operations will use.
+
+ // We need to filter the IR functions to find only those
+ // that represent entry points.
+ //
+ auto func = cast<IRFunc>(inst);
+ if(!func->findDecoration<IREntryPointDecoration>())
+ continue;
+
+ m_entryPoints.add(func);
+ }
+ break;
+ }
+ }
+
+ // Now that we've capture all the relevant global entities from the IR,
+ // we can being to transform them in an appropriate order.
+ //
+ // The first step will be to create fields in the `KernelContext`
+ // type to represent any global parameters or global variables.
+ //
+ // The keys for the fields that are created will be remembered
+ // in a dictionary, so that we can find them later based on
+ // the global parameter/variable.
+ //
+ if( m_globalUniformsParam )
+ {
+ // For the parameter representing all the global uniform shader
+ // parameters, we create a field that exactly matches its type.
+ //
+ createContextStructField(m_globalUniformsParam, m_globalUniformsParam->getFullType());
+ }
+ for( auto globalVar : m_globalVars )
+ {
+ // A `IRGlobalVar` represents a pointer to where the variable is stored,
+ // so we need to create a field of the pointed-to type to represent it.
+ //
+ createContextStructField(globalVar, globalVar->getDataType()->getValueType());
+ }
+
+ // Once all the fields have been created, we can process the entry points.
+ //
+ // Each entry point will create a local `KernelContext` variable and
+ // initialize it based on the parameters passed to the entry point.
+ //
+ // The local variable introduced here will be registered as the representation
+ // of the context to be used in the body of the entry point.
+ //
+ for( auto entryPoint : m_entryPoints )
+ {
+ createContextForEntryPoint(entryPoint);
+ }
+
+ // Now that we've prepared all the entry points, we can make another
+ // pass over the global parameters/variables and start to replace
+ // their use sites with references to the fields of the context.
+ //
+ // Wherever a global parameter/variable is being referenced in a function,
+ // we will need to find or create a context value for that function
+ // to use. The context value for entry points has already been established
+ // above, but other functions will have an explicit context parameter
+ // added on demand.
+ //
+ if( m_globalUniformsParam )
+ {
+ replaceUsesOfGlobalParam(m_globalUniformsParam);
+ }
+ for( auto globalVar : m_globalVars )
+ {
+ replaceUsesOfGlobalVar(globalVar);
+ }
+ }
+
+ // As noted above, we will maintain mappings to record
+ // the key for the context field created for a global
+ // variable parameter, and to record the context pointer
+ // value to use for a function.
+ //
+ Dictionary<IRInst*, IRStructKey*> m_mapInstToContextFieldKey;
+ Dictionary<IRFunc*, IRInst*> m_mapFuncToContextPtr;
+
+ void createContextStructField(IRInst* originalInst, IRType* type)
+ {
+ // Creating a field in the context struct to represent
+ // `originalInst` is straightforward.
+
+ IRBuilder builder(m_sharedBuilder);
+ builder.setInsertBefore(m_contextStructType);
+
+ // We create a "key" for the new field, and then a field
+ // of the appropraite type.
+ //
+ auto key = builder.createStructKey();
+ auto field = builder.createStructField(m_contextStructType, key, type);
+
+ // If the original instruction had a name hint on it,
+ // then we transfer that name hint over to the key,
+ // so that the field will have the name of the former
+ // global variable/parameter.
+ //
+ if( auto nameHint = originalInst->findDecoration<IRNameHintDecoration>() )
+ {
+ nameHint->insertAtStart(key);
+ }
+
+ // Any other decorations on the original instruction
+ // (e.g., pertaining to layout) need to be transferred
+ // over to the field (not the key).
+ //
+ originalInst->transferDecorationsTo(field);
+
+ // We end by making note of the key that was created
+ // for the instruction, so that we can use the key
+ // to access the field later.
+ //
+ m_mapInstToContextFieldKey.Add(originalInst, key);
+ }
+
+ void createContextForEntryPoint(IRFunc* entryPointFunc)
+ {
+ // We can only introduce the explicit context into
+ // entry points that have definitions.
+ //
+ auto firstBlock = entryPointFunc->getFirstBlock();
+ if(!firstBlock)
+ return;
+
+ IRBuilder builder(m_sharedBuilder);
+
+ // The code we introduce will all be added to the start
+ // of the first block of the function.
+ //
+ auto firstOrdinary = firstBlock->getFirstOrdinaryInst();
+ builder.setInsertBefore(firstOrdinary);
+
+ // If there was a global-scope uniform parameter before,
+ // then we need to introduce an explicit parameter onto
+ // each entry-point function to represent it.
+ //
+ IRParam* globalUniformsParam = nullptr;
+ if( m_globalUniformsParam )
+ {
+ globalUniformsParam = builder.createParam(m_globalUniformsParam->getFullType());
+ if( auto nameHint = m_globalUniformsParam->findDecoration<IRNameHintDecoration>() )
+ {
+ builder.addNameHintDecoration(globalUniformsParam, nameHint->getNameOperand());
+ }
+
+ // The new parameter will be the last one in the
+ // parameter list of the entry point.
+ //
+ globalUniformsParam->insertBefore(firstOrdinary);
+ }
+
+ // The `KernelContext` to use inside the entry point
+ // will be a local variable declared in the first block.
+ //
+ auto contextVarPtr = builder.emitVar(m_contextStructType);
+ addKernelContextNameHint(contextVarPtr);
+ m_mapFuncToContextPtr.Add(entryPointFunc, contextVarPtr);
+
+ // If there is a global-scope uniform parameter, then
+ // we need to use our new explicit entry point parameter
+ // to inialize the corresponding field of the `KernelContext`
+ // before moving on with execution of the kernel body.
+ //
+ if(m_globalUniformsParam)
+ {
+ auto fieldKey = m_mapInstToContextFieldKey[m_globalUniformsParam];
+ auto fieldType = globalUniformsParam->getFullType();
+ auto fieldPtrType = builder.getPtrType(fieldType);
+
+ // We compute the addrress of the field and store the
+ // value of the parameter into it.
+ //
+ auto fieldPtr = builder.emitFieldAddress(fieldPtrType, contextVarPtr, fieldKey);
+ builder.emitStore(fieldPtr, globalUniformsParam);
+ }
+
+ // Note: at this point the `KernelContext` has additional
+ // fields for global variables that do not seem to have
+ // been initialized.
+ //
+ // Instead of making this pass take responsibility for initializing
+ // global variables, it is instead expected that clients will
+ // run the pass in `slang-ir-explicit-global-init` first,
+ // in order to move all initialization of globals into the
+ // entry point functions.
+ }
+
+ void replaceUsesOfGlobalParam(IRGlobalParam* globalParam)
+ {
+ IRBuilder builder(m_sharedBuilder);
+
+ // A global shader parameter was mapped to a field
+ // in the context structure, so we find the appropriate key.
+ //
+ auto key = m_mapInstToContextFieldKey[globalParam];
+
+ auto valType = globalParam->getFullType();
+ auto ptrType = builder.getPtrType(valType);
+
+ // We then iterate over the uses of the parameter,
+ // being careful to defend against the use/def information
+ // being changed while we walk it.
+ //
+ IRUse* nextUse = nullptr;
+ for( IRUse* use = globalParam->firstUse; use; use = nextUse )
+ {
+ nextUse = use->nextUse;
+
+ // At each use site, we need to look up the context
+ // pointer that is appropriate for that use.
+ //
+ auto user = use->getUser();
+ auto contextParam = findOrCreateContextPtrForInst(user);
+ builder.setInsertBefore(user);
+
+ // The value of the parameter can be produced by
+ // taking the address of the corresponding field
+ // in the context struct and loading from it.
+ //
+ auto ptr = builder.emitFieldAddress(ptrType, contextParam, key);
+ auto val = builder.emitLoad(valType, ptr);
+ use->set(val);
+ }
+ }
+
+ void replaceUsesOfGlobalVar(IRGlobalVar* globalVar)
+ {
+ IRBuilder builder(m_sharedBuilder);
+
+ // A global variable was mapped to a field
+ // in the context structure, so we find the appropriate key.
+ //
+ auto key = m_mapInstToContextFieldKey[globalVar];
+
+ auto ptrType = globalVar->getDataType();
+
+ // We then iterate over the uses of the variable,
+ // being careful to defend against the use/def information
+ // being changed while we walk it.
+ //
+ IRUse* nextUse = nullptr;
+ for( IRUse* use = globalVar->firstUse; use; use = nextUse )
+ {
+ nextUse = use->nextUse;
+
+ // At each use site, we need to look up the context
+ // pointer that is appropriate for that use.
+ //
+ auto user = use->getUser();
+ auto contextParam = findOrCreateContextPtrForInst(user);
+ builder.setInsertBefore(user);
+
+ // The address of the variable can be produced by
+ // taking the address of the corresponding field
+ // in the context struct.
+ //
+ auto ptr = builder.emitFieldAddress(ptrType, contextParam, key);
+ use->set(ptr);
+ }
+ }
+
+ IRInst* findOrCreateContextPtrForInst(IRInst* inst)
+ {
+ // When looking up the context pointer to use for
+ // an instruction, we need to find the enclosing
+ // function and use whatever context pointer it uses.
+ //
+ for( IRInst* i = inst; i; i = i->getParent() )
+ {
+ if( auto func = as<IRFunc>(i) )
+ {
+ return findOrCreateContextPtrForFunc(func);
+ }
+ }
+
+ // If a non-constant global entity is being referenced by
+ // something that is *not* nested under an IR function, then
+ // we are in trouble.
+ //
+ SLANG_UNEXPECTED("no outer func at use site for global");
+ UNREACHABLE_RETURN(nullptr);
+ }
+
+ IRInst* findOrCreateContextPtrForFunc(IRFunc* func)
+ {
+ // At this point we are being asked to either find or
+ // produce a context pointer for use inside `func`.
+ //
+ // If we already created such a pointer (perhaps because
+ // `func` is an entry point), then we are home free.
+ //
+ if( auto found = m_mapFuncToContextPtr.TryGetValue(func) )
+ {
+ return *found;
+ }
+
+ // Otherwise, we are going to need to introduce an
+ // explicit parameter to `func` to represent the
+ // context.
+ //
+ IRBuilder builder(m_sharedBuilder);
+
+ // We can safely assume that `func` has a body, because
+ // otherwise we wouldn't be getting a request for the
+ // context pointer value to use in its body.
+ //
+ auto firstBlock = func->getFirstBlock();
+ SLANG_ASSERT(firstBlock);
+
+ // We create a new parameter at the end of the parameter
+ // list for `func`, with a type of `KernelContext*`.
+ //
+ IRParam* contextParam = builder.createParam(m_contextStructPtrType);
+ addKernelContextNameHint(contextParam);
+ contextParam->insertBefore(firstBlock->getFirstOrdinaryInst());
+
+ // The new parameter can be registerd as the context value
+ // to be used for `func` right away.
+ //
+ // Note: we register the value *before* modifying locations
+ // that call `func` to protect against a possible infinite-recursion
+ // situation if `func` is recursive along some path.
+ //
+ m_mapFuncToContextPtr.Add(func, contextParam);
+
+ // Any code that calls `func` now needs to be updated to pass
+ // the context parameter.
+ //
+ // TODO: There is an issue here if `func` might be called
+ // dynamically, through something like a witness table.
+ //
+ List<IRUse*> uses;
+ for( auto use = func->firstUse; use; use = use->nextUse )
+ {
+ // We will only fix up calls to `func`, and ignore
+ // other operations that might refer to it.
+ //
+ // TODO: We need to allow things like decorations that might
+ // refer to `func`, but this logic is also going to
+ // ignore things like witness tables that refer to `func`,
+ // or operations that pass `func` as a function pointer
+ // to a higher-order function.
+ //
+ auto call = as<IRCall>(use->getUser());
+ if(!call)
+ continue;
+
+ // We are going to construct a new call to `func`
+ // that has all of the arguments of the original call...
+ //
+ UInt originalArgCount = call->getArgCount();
+ List<IRInst*> args;
+ for( UInt aa = 0; aa < originalArgCount; ++aa )
+ {
+ args.add(call->getArg(aa));
+ }
+
+ // ... plus an additional argument representing
+ // the context pointer at the call site (note that
+ // this step leads to a potential for recursion in this pass;
+ // the maximum depth of the recursion is bounded by the
+ // maximum length of a cycle-free path through the call
+ // graph of the program).
+ //
+ args.add(findOrCreateContextPtrForInst(call));
+
+ // The new call will be emitted right before the old one,
+ // then used to replace it.
+ //
+ builder.setInsertBefore(call);
+ auto newCall = builder.emitCallInst(call->getFullType(), call->getCallee(), args);
+ call->replaceUsesWith(newCall);
+ call->removeAndDeallocate();
+ }
+
+ return contextParam;
+ }
+
+ // Because we have multiple places where instructions representing
+ // the kernel context get introduced, we have factored out a subroutine
+ // for setting up the name hint to be used by those instructions.
+ //
+ void addKernelContextNameHint(IRInst* inst)
+ {
+ IRBuilder builder(m_sharedBuilder);
+ builder.addNameHintDecoration(inst, UnownedTerminatedStringSlice("kernelContext"));
+ }
+};
+
+ /// Collect global-scope variables/paramters to form an explicit context that gets threaded through
+void introduceExplicitGlobalContext(
+ IRModule* module,
+ CodeGenTarget target)
+{
+ IntroduceExplicitGlobalContextPass pass;
+ pass.m_module = module;
+ pass.m_target = target;
+ pass.processModule();
+}
+
+}
diff --git a/source/slang/slang-ir-explicit-global-context.h b/source/slang/slang-ir-explicit-global-context.h
new file mode 100644
index 000000000..521f3b76a
--- /dev/null
+++ b/source/slang/slang-ir-explicit-global-context.h
@@ -0,0 +1,15 @@
+// slang-ir-explicit-global-context.h
+#pragma once
+
+#include "slang-compiler.h"
+
+namespace Slang
+{
+struct IRModule;
+
+ /// Collect global-scope variables/paramters to form an explicit context that gets threaded through
+void introduceExplicitGlobalContext(
+ IRModule* module,
+ CodeGenTarget target);
+
+}
diff --git a/source/slang/slang-ir-explicit-global-init.cpp b/source/slang/slang-ir-explicit-global-init.cpp
new file mode 100644
index 000000000..07397902e
--- /dev/null
+++ b/source/slang/slang-ir-explicit-global-init.cpp
@@ -0,0 +1,207 @@
+// slang-ir-explicit-global-init.cpp
+#include "slang-ir-explicit-global-init.h"
+
+#include "slang-ir-insts.h"
+
+namespace Slang
+{
+
+// This pass is responsible for taking code in a form like:
+//
+// static int gCounter = 1;
+//
+// void computeMain()
+// {
+// ...
+// int tmp = gCounter++;
+// }
+//
+// and transforming it so that the initialization of global
+// variables is performed explicitly at the start of each
+// entry-point funciton:
+//
+// static int gCounter;
+//
+// void computeMain()
+// {
+// gCounter = 1;
+// ...
+// int tmp = gCounter++;
+// }
+//
+// Transforming the code in this way may be required for targets
+// that do not support initial-value expressions on global
+// variables (e.g., SPIR-V is such a target). It can also be
+// useful as a pre-process before other transformations that
+// might work with global variables, because after this change
+// there cannot be any global variables with initializers.
+
+struct MoveGlobalVarInitializationToEntryPointsPass
+{
+ IRModule* m_module;
+
+ SharedIRBuilder* m_sharedBuilder;
+
+ // In the Slang IR, a global variable represents a pointer
+ // to the storage for the variable but it *also* encodes
+ // the logic used to compute the initial value of that
+ // variable. This works because `IRGlobalVar` is a subtype
+ // of `IRGlobalValueWithCode`, which is also the base
+ // type of `IRFunc`. Thus a global variable behaves a
+ // bit like a function, which just happens to compute
+ // the initial value for the variable.
+ //
+ // Part of the work in this pass will be to split those
+ // two pars of the variable, so that we end up with
+ // a global variable with not initialization logic,
+ // plus an ordinary `IRFunc` to compute the initial
+ // value.
+ //
+ // We will compute this split representation and then
+ // hold onto it so that we can use it for injecting
+ // the initialization logic into entry points.
+ //
+ struct GlobalVarInfo
+ {
+ IRGlobalVar* globalVar = nullptr;
+ IRFunc* initFunc = nullptr;
+ };
+ List<GlobalVarInfo> m_globalVarsWithInit;
+
+ void processModule(IRModule* module)
+ {
+ m_module = module;
+
+ SharedIRBuilder sharedBuilder(module);
+ m_sharedBuilder = &sharedBuilder;
+
+ // We start by looking for global variables with
+ // initialization logic in the IR, and processing
+ // each to produce a split variable (now without
+ // initialization) and function (to compute the
+ // initial value).
+ //
+ for( auto inst : m_module->getGlobalInsts() )
+ {
+ auto globalVar = as<IRGlobalVar>(inst);
+ if(!globalVar)
+ continue;
+
+ auto firstBlock = globalVar->getFirstBlock();
+ if(!firstBlock)
+ continue;
+
+ processGlobalVarWithInit(globalVar, firstBlock);
+ }
+
+ // Then we loop over all the entry points in the
+ // module and modify them to explicitly initialize
+ // all the global variables that were identified
+ // and processed in the first pass.
+ //
+ for( auto inst : m_module->getGlobalInsts() )
+ {
+ auto func = as<IRFunc>(inst);
+ if(!func)
+ continue;
+
+ if(!func->findDecoration<IREntryPointDecoration>())
+ continue;
+
+ processEntryPoint(func);
+ }
+ }
+
+ void processGlobalVarWithInit(IRGlobalVar* globalVar, IRBlock* firstBlock)
+ {
+ IRBuilder builder(m_sharedBuilder);
+ builder.setInsertBefore(globalVar);
+
+ // Becaue an `IRGlobalVar` reprsents a pointer to the storage
+ // for the variable, we need to extract the underlying value
+ // type from the pointer type.
+ //
+ auto valueType = globalVar->getDataType()->getValueType();
+
+ // We are going to construct an explicit IR function to compute
+ // the initial value of the variable. That function will alway
+ // take zero parameters.
+ //
+ auto initFunc = builder.createFunc();
+ initFunc->setFullType(builder.getFuncType(0, nullptr, valueType));
+
+ // The basic blocks under teh `IRGlobalVar` define its initialization
+ // logic, and we can simply move those blocks over to the new
+ // `IRFunc` to define its behavior.
+ //
+ // As a result, the `globalVar` will no longer have its own
+ // initialization logic, which is a postcondition this pass
+ // needed to guarantee.
+ //
+ IRBlock* nextBlock = nullptr;
+ for( IRBlock* block = firstBlock; block; block = nextBlock )
+ {
+ nextBlock = block->getNextBlock();
+
+ block->removeFromParent();
+ block->insertAtEnd(initFunc);
+ }
+
+ // We need to remember the variable and the assocaited
+ // initial-value function so that we can iterate over
+ // them in the per-entry-point logic below.
+ //
+ GlobalVarInfo info;
+ info.globalVar = globalVar;
+ info.initFunc = initFunc;
+ m_globalVarsWithInit.add(info);
+ }
+
+ void processEntryPoint(IRFunc* entryPointFunc)
+ {
+ // We can only process entry point definitions, not declarations.
+ //
+ auto firstBlock = entryPointFunc->getFirstBlock();
+ if(!firstBlock)
+ return;
+
+ // We are going to insert initiailization logic at the start
+ // of the first block of the entry point.
+ //
+ IRBuilder builder(m_sharedBuilder);
+ builder.setInsertBefore(firstBlock->getFirstOrdinaryInst());
+
+ for( auto globalVarInfo : m_globalVarsWithInit )
+ {
+ // The earlier step split each global variable into
+ // a variable with no initialization logic, plus a function
+ // that can be called to compute the initial value.
+ //
+ auto globalVar = globalVarInfo.globalVar;
+ auto initFunc = globalVarInfo.initFunc;
+
+ // Because the `IRGlobalVar` represents a pointer to
+ // storage, we need to get the pointed-to type to
+ // get the type of the initial value.
+ //
+ auto valType = globalVar->getDataType()->getValueType();
+
+ // We compute the initial value for the variable by calling
+ // the initial-value function with no arguments, and then
+ // we store that value into the corresponding global.
+ //
+ auto initVal = builder.emitCallInst(valType, initFunc, 0, nullptr);
+ builder.emitStore(globalVar, initVal);
+ }
+ }
+};
+
+ /// Move initialization logic off of global variables and onto each entry point
+void moveGlobalVarInitializationToEntryPoints(
+ IRModule* module)
+{
+ MoveGlobalVarInitializationToEntryPointsPass pass;
+ pass.processModule(module);
+}
+
+}
diff --git a/source/slang/slang-ir-explicit-global-init.h b/source/slang/slang-ir-explicit-global-init.h
new file mode 100644
index 000000000..fb10bf1e5
--- /dev/null
+++ b/source/slang/slang-ir-explicit-global-init.h
@@ -0,0 +1,11 @@
+// slang-ir-explicit-global-init.h
+#pragma once
+
+namespace Slang
+{
+struct IRModule;
+
+ /// Move initialization logic off of global variables and onto each entry point
+void moveGlobalVarInitializationToEntryPoints(
+ IRModule* module);
+}
diff --git a/source/slang/slang-ir-insts.h b/source/slang/slang-ir-insts.h
index a7dd1355a..745cc6b02 100644
--- a/source/slang/slang-ir-insts.h
+++ b/source/slang/slang-ir-insts.h
@@ -1124,6 +1124,10 @@ struct IRVarLayout : IRLayout
};
};
+bool isVaryingResourceKind(LayoutResourceKind kind);
+bool isVaryingParameter(IRTypeLayout* typeLayout);
+bool isVaryingParameter(IRVarLayout* varLayout);
+
/// Associate layout information with an instruction.
///
/// This decoration is used in three main ways:
@@ -1402,6 +1406,8 @@ struct IRVar : IRInst
/// blocks nested inside this value.
struct IRGlobalVar : IRGlobalValueWithCode
{
+ IR_LEAF_ISA(GlobalVar)
+
IRPtrType* getDataType()
{
return cast<IRPtrType>(IRInst::getDataType());
diff --git a/source/slang/slang-ir-legalize-varying-params.cpp b/source/slang/slang-ir-legalize-varying-params.cpp
index 5772e79f9..3df651e6a 100644
--- a/source/slang/slang-ir-legalize-varying-params.cpp
+++ b/source/slang/slang-ir-legalize-varying-params.cpp
@@ -406,7 +406,8 @@ protected:
m_paramLayout = as<IRVarLayout>(paramLayoutDecoration->getLayout());
SLANG_ASSERT(m_paramLayout);
- // TODO: We need to detect and skip parameters here that are not varying.
+ if(!isVaryingParameter(m_paramLayout))
+ return;
// TODO: The GLSL-specific variant of this pass has several
// special cases that handle entry-point parameters for things like
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index d6138f2af..b01b275f9 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -1465,6 +1465,35 @@ bool isKhronosTarget(TargetRequest* targetReq)
}
}
+bool isCPUTarget(TargetRequest* targetReq)
+{
+ switch( targetReq->getTarget() )
+ {
+ default:
+ return false;
+
+ case CodeGenTarget::CPPSource:
+ case CodeGenTarget::CSource:
+ case CodeGenTarget::HostCallable:
+ case CodeGenTarget::Executable:
+ case CodeGenTarget::SharedLibrary:
+ return true;
+ }
+}
+
+bool isCUDATarget(TargetRequest* targetReq)
+{
+ switch( targetReq->getTarget() )
+ {
+ default:
+ return false;
+
+ case CodeGenTarget::CUDASource:
+ case CodeGenTarget::PTX:
+ return true;
+ }
+}
+
static bool isD3D11Target(TargetRequest*)
{
// We aren't officially supporting D3D11 right now
@@ -1950,8 +1979,20 @@ static RefPtr<TypeLayout> _createParameterGroupTypeLayout(
// can't retroactively change whether or not `U` needed
// a constant buffer).
//
+ // Note: On CUDA and CPU targets, where we have true pointers,
+ // we always want to create an actual indirection for a parameter
+ // group, since otherwise the layout of a constant buffer would
+ // depend on its contents (in particular, whether or not
+ // the contents are empty).
+ //
+ // TODO: there is a subroutine arleady that tries to determine
+ // if a wrapping constant buffer is needed based on an element
+ // type and layout context; we should be using that here.
+ //
bool wantConstantBuffer = _usesOrdinaryData(rawElementTypeLayout)
- || _usesExistentialData(rawElementTypeLayout);
+ || _usesExistentialData(rawElementTypeLayout)
+ || isCUDATarget(context.targetReq)
+ || isCPUTarget(context.targetReq);
if( wantConstantBuffer )
{
// If there is any ordinary data, then we'll need to
@@ -2282,7 +2323,9 @@ static RefPtr<TypeLayout> _createParameterGroupTypeLayout(
}
/// Do we need to wrap the given element type in a constant buffer layout?
-static bool needsConstantBuffer(RefPtr<TypeLayout> elementTypeLayout)
+static bool needsConstantBuffer(
+ TypeLayoutContext const& context,
+ RefPtr<TypeLayout> elementTypeLayout)
{
// We need a constant buffer if the element type has ordinary/uniform data.
//
@@ -2298,6 +2341,14 @@ static bool needsConstantBuffer(RefPtr<TypeLayout> elementTypeLayout)
return true;
}
+ // Finally, on certain targets we always want to create
+ // wrapper constant buffer layouts, even if there is no
+ // data whatsoever.
+ //
+ auto targetReq = context.targetReq;
+ if( isCPUTarget(targetReq) || isCUDATarget(targetReq) )
+ return true;
+
return false;
}
@@ -2309,7 +2360,7 @@ RefPtr<TypeLayout> createConstantBufferTypeLayoutIfNeeded(
// we are trying to lay out even needs a constant buffer allocated
// for it.
//
- if(!needsConstantBuffer(elementTypeLayout))
+ if(!needsConstantBuffer(context, elementTypeLayout))
return elementTypeLayout;
auto parameterGroupRules = context.getRulesFamily()->getConstantBufferRules();
diff --git a/source/slang/slang.vcxproj b/source/slang/slang.vcxproj
index d9c15fd23..d23dccabf 100644
--- a/source/slang/slang.vcxproj
+++ b/source/slang/slang.vcxproj
@@ -227,7 +227,10 @@
<ClInclude Include="slang-ir-constexpr.h" />
<ClInclude Include="slang-ir-dce.h" />
<ClInclude Include="slang-ir-dominators.h" />
+ <ClInclude Include="slang-ir-entry-point-raw-ptr-params.h" />
<ClInclude Include="slang-ir-entry-point-uniforms.h" />
+ <ClInclude Include="slang-ir-explicit-global-context.h" />
+ <ClInclude Include="slang-ir-explicit-global-init.h" />
<ClInclude Include="slang-ir-glsl-legalize.h" />
<ClInclude Include="slang-ir-inline.h" />
<ClInclude Include="slang-ir-inst-defs.h" />
@@ -319,7 +322,10 @@
<ClCompile Include="slang-ir-constexpr.cpp" />
<ClCompile Include="slang-ir-dce.cpp" />
<ClCompile Include="slang-ir-dominators.cpp" />
+ <ClCompile Include="slang-ir-entry-point-raw-ptr-params.cpp" />
<ClCompile Include="slang-ir-entry-point-uniforms.cpp" />
+ <ClCompile Include="slang-ir-explicit-global-context.cpp" />
+ <ClCompile Include="slang-ir-explicit-global-init.cpp" />
<ClCompile Include="slang-ir-glsl-legalize.cpp" />
<ClCompile Include="slang-ir-inline.cpp" />
<ClCompile Include="slang-ir-layout.cpp" />
@@ -420,4 +426,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
-</Project> \ No newline at end of file
+</Project> \ No newline at end of file
diff --git a/source/slang/slang.vcxproj.filters b/source/slang/slang.vcxproj.filters
index 19a571e0b..f32e911b5 100644
--- a/source/slang/slang.vcxproj.filters
+++ b/source/slang/slang.vcxproj.filters
@@ -132,9 +132,18 @@
<ClInclude Include="slang-ir-dominators.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="slang-ir-entry-point-raw-ptr-params.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="slang-ir-entry-point-uniforms.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="slang-ir-explicit-global-context.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="slang-ir-explicit-global-init.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="slang-ir-glsl-legalize.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -404,9 +413,18 @@
<ClCompile Include="slang-ir-dominators.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="slang-ir-entry-point-raw-ptr-params.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="slang-ir-entry-point-uniforms.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="slang-ir-explicit-global-context.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="slang-ir-explicit-global-init.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="slang-ir-glsl-legalize.cpp">
<Filter>Source Files</Filter>
</ClCompile>