summaryrefslogtreecommitdiffstats
path: root/source/slang/slang-ir-explicit-global-context.cpp
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2024-05-06 14:53:27 -0700
committerGitHub <noreply@github.com>2024-05-06 14:53:27 -0700
commit618428a87b8295347288262ea13eff63cc62aa56 (patch)
treea602766742a437c3f0577ae6e3c1077c1b4e082c /source/slang/slang-ir-explicit-global-context.cpp
parent2220d26c24b259075182672be22e1494b4affc95 (diff)
Delete `wrap-global-context` pass. (#4114)
* Delete `wrap-global-context` pass. The pass was added for the metal backend without realizing that the existing `explicit-global-context` does 99% of the job. Instead of duplicating the logic in a different pass for metal, we extend same explicit-global-context pass to work for metal. * Fix build.
Diffstat (limited to 'source/slang/slang-ir-explicit-global-context.cpp')
-rw-r--r--source/slang/slang-ir-explicit-global-context.cpp103
1 files changed, 53 insertions, 50 deletions
diff --git a/source/slang/slang-ir-explicit-global-context.cpp b/source/slang/slang-ir-explicit-global-context.cpp
index ab3a8bb51..f63ceb71e 100644
--- a/source/slang/slang-ir-explicit-global-context.cpp
+++ b/source/slang/slang-ir-explicit-global-context.cpp
@@ -2,6 +2,7 @@
#include "slang-ir-explicit-global-context.h"
#include "slang-ir-insts.h"
+#include "slang-ir-clone.h"
namespace Slang
{
@@ -19,7 +20,7 @@ struct IntroduceExplicitGlobalContextPass
IRStructType* m_contextStructType = nullptr;
IRPtrType* m_contextStructPtrType = nullptr;
- IRGlobalParam* m_globalUniformsParam = nullptr;
+ List<IRGlobalParam*> m_globalParams;
List<IRGlobalVar*> m_globalVars;
List<IRFunc*> m_entryPoints;
@@ -80,17 +81,26 @@ struct IntroduceExplicitGlobalContextPass
// One detail we need to be careful about is that as a result
- // of legalizing the varying parameters of kernels, we can end
- // up with global parameters for varying parameters on CUDA
- // (e.g., to represent `threadIdx`. We thus skip any global-scope
- // parameters that are varying instead of uniform.
+ // of legalizing the varying parameters of compute kernels to
+ // CPU or CUDA, we can end up with global parameters for varying
+ // parameters on CUDA (e.g., to represent `threadIdx`. We thus
+ // skip any global-scope parameters that are varying instead of
+ // uniform.
//
- auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>();
- SLANG_ASSERT(layoutDecor);
- auto layout = as<IRVarLayout>(layoutDecor->getLayout());
- SLANG_ASSERT(layout);
- if(isVaryingParameter(layout))
- continue;
+ switch (m_target)
+ {
+ case CodeGenTarget::CUDASource:
+ case CodeGenTarget::CPPSource:
+ {
+ auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>();
+ SLANG_ASSERT(layoutDecor);
+ auto layout = as<IRVarLayout>(layoutDecor->getLayout());
+ SLANG_ASSERT(layout);
+ if (isVaryingParameter(layout))
+ continue;
+ }
+ break;
+ }
// Because of upstream passes, we expect there to be only a
// single global uniform parameter (at most).
@@ -105,8 +115,7 @@ struct IntroduceExplicitGlobalContextPass
if(m_target == CodeGenTarget::CUDASource)
continue;
- SLANG_ASSERT(!m_globalUniformsParam);
- m_globalUniformsParam = globalParam;
+ m_globalParams.add(globalParam);
}
break;
@@ -130,15 +139,15 @@ struct IntroduceExplicitGlobalContextPass
}
// If there are no global-scope entities that require processing,
- // then we can completely skip the work of this pass for CUDA.
+ // then we can completely skip the work of this pass for CUDA/Metal.
//
// Note: We cannot skip the rest of the pass for CPU, because
// it is responsible for introducing the explicit entry-point
// parameter that is used for passing in the global param(s).
//
- if( m_target == CodeGenTarget::CUDASource )
+ if( m_target != CodeGenTarget::CPPSource )
{
- if( !m_globalUniformsParam && (m_globalVars.getCount() == 0) )
+ if (m_globalParams.getCount() == 0 && m_globalVars.getCount() == 0)
{
return;
}
@@ -156,7 +165,7 @@ struct IntroduceExplicitGlobalContextPass
// The context will usually be passed around by pointer,
// so we get and cache that pointer type up front.
//
- m_contextStructPtrType = builder.getPtrType(m_contextStructType);
+ m_contextStructPtrType = builder.getPtrType(kIROp_PtrType, m_contextStructType, (IRIntegerValue)AddressSpace::ThreadLocal);
// The first step will be to create fields in the `KernelContext`
@@ -166,12 +175,13 @@ struct IntroduceExplicitGlobalContextPass
// in a dictionary, so that we can find them later based on
// the global parameter/variable.
//
- if( m_globalUniformsParam )
+ for (auto globalParam : m_globalParams)
{
// For the parameter representing all the global uniform shader
// parameters, we create a field that exactly matches its type.
//
- createContextStructField(m_globalUniformsParam, m_globalUniformsParam->getFullType());
+
+ createContextStructField(globalParam, globalParam->getFullType());
}
for( auto globalVar : m_globalVars )
{
@@ -204,9 +214,9 @@ struct IntroduceExplicitGlobalContextPass
// above, but other functions will have an explicit context parameter
// added on demand.
//
- if( m_globalUniformsParam )
+ for (auto globalParam : m_globalParams)
{
- replaceUsesOfGlobalParam(m_globalUniformsParam);
+ replaceUsesOfGlobalParam(globalParam);
}
for( auto globalVar : m_globalVars )
{
@@ -234,23 +244,11 @@ struct IntroduceExplicitGlobalContextPass
// of the appropraite type.
//
auto key = builder.createStructKey();
- auto field = builder.createStructField(m_contextStructType, key, type);
+ builder.createStructField(m_contextStructType, key, type);
- // If the original instruction had a name hint on it,
- // then we transfer that name hint over to the key,
- // so that the field will have the name of the former
- // global variable/parameter.
- //
- if( auto nameHint = originalInst->findDecoration<IRNameHintDecoration>() )
- {
- nameHint->insertAtStart(key);
- }
-
- // Any other decorations on the original instruction
- // (e.g., pertaining to layout) need to be transferred
- // over to the field (not the key).
- //
- originalInst->transferDecorationsTo(field);
+ // Clone all original decorations to the new struct key.
+ IRCloneEnv cloneEnv;
+ cloneInstDecorationsAndChildren(&cloneEnv, m_module, originalInst, key);
// We end by making note of the key that was created
// for the instruction, so that we can use the key
@@ -280,21 +278,26 @@ struct IntroduceExplicitGlobalContextPass
// then we need to introduce an explicit parameter onto
// each entry-point function to represent it.
//
- IRParam* globalUniformsParam = nullptr;
- if( m_globalUniformsParam )
+ struct GlobalParamInfo
{
- globalUniformsParam = builder.createParam(m_globalUniformsParam->getFullType());
- if( auto nameHint = m_globalUniformsParam->findDecoration<IRNameHintDecoration>() )
- {
- builder.addNameHintDecoration(globalUniformsParam, nameHint->getNameOperand());
- }
+ IRGlobalParam* globalParam;
+ IRParam* entryPointParam;
+ };
+ List<GlobalParamInfo> entryPointParams;
+ for (auto globalParam : m_globalParams)
+ {
+ auto entryPointParam = builder.createParam(globalParam->getFullType());
+ IRCloneEnv cloneEnv;
+ cloneInstDecorationsAndChildren(&cloneEnv, m_module, globalParam, entryPointParam);
+ entryPointParams.add({globalParam, entryPointParam});
// The new parameter will be the last one in the
// parameter list of the entry point.
//
- globalUniformsParam->insertBefore(firstOrdinary);
+ entryPointParam->insertBefore(firstOrdinary);
}
- else if(m_target == CodeGenTarget::CPPSource)
+
+ if (m_target == CodeGenTarget::CPPSource && m_globalParams.getCount() == 0)
{
// The nature of our current ABI for entry points on CPU
// means that we need an explicit parameter to be *declared*
@@ -316,17 +319,17 @@ struct IntroduceExplicitGlobalContextPass
// to inialize the corresponding field of the `KernelContext`
// before moving on with execution of the kernel body.
//
- if(m_globalUniformsParam)
+ for (auto entryPointParam : entryPointParams)
{
- auto fieldKey = m_mapInstToContextFieldKey[m_globalUniformsParam];
- auto fieldType = globalUniformsParam->getFullType();
+ auto fieldKey = m_mapInstToContextFieldKey[entryPointParam.globalParam];
+ auto fieldType = entryPointParam.globalParam->getFullType();
auto fieldPtrType = builder.getPtrType(fieldType);
// We compute the addrress of the field and store the
// value of the parameter into it.
//
auto fieldPtr = builder.emitFieldAddress(fieldPtrType, contextVarPtr, fieldKey);
- builder.emitStore(fieldPtr, globalUniformsParam);
+ builder.emitStore(fieldPtr, entryPointParam.entryPointParam);
}
// Note: at this point the `KernelContext` has additional