summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2024-05-06 14:53:27 -0700
committerGitHub <noreply@github.com>2024-05-06 14:53:27 -0700
commit618428a87b8295347288262ea13eff63cc62aa56 (patch)
treea602766742a437c3f0577ae6e3c1077c1b4e082c
parent2220d26c24b259075182672be22e1494b4affc95 (diff)
Delete `wrap-global-context` pass. (#4114)
* Delete `wrap-global-context` pass. The pass was added for the metal backend without realizing that the existing `explicit-global-context` does 99% of the job. Instead of duplicating the logic in a different pass for metal, we extend same explicit-global-context pass to work for metal. * Fix build.
-rw-r--r--build/visual-studio/slang/slang.vcxproj2
-rw-r--r--build/visual-studio/slang/slang.vcxproj.filters6
-rw-r--r--source/slang/slang-emit.cpp9
-rw-r--r--source/slang/slang-ir-explicit-global-context.cpp103
-rw-r--r--source/slang/slang-ir-wrap-global-context.cpp286
-rw-r--r--source/slang/slang-ir-wrap-global-context.h14
6 files changed, 54 insertions, 366 deletions
diff --git a/build/visual-studio/slang/slang.vcxproj b/build/visual-studio/slang/slang.vcxproj
index f5e1c4c5d..ee281252d 100644
--- a/build/visual-studio/slang/slang.vcxproj
+++ b/build/visual-studio/slang/slang.vcxproj
@@ -487,7 +487,6 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
<ClInclude Include="..\..\..\source\slang\slang-ir-variable-scope-correction.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-vk-invert-y.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.h" />
- <ClInclude Include="..\..\..\source\slang\slang-ir-wrap-global-context.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir.h" />
<ClInclude Include="..\..\..\source\slang\slang-language-server-ast-lookup.h" />
@@ -720,7 +719,6 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla
<ClCompile Include="..\..\..\source\slang\slang-ir-variable-scope-correction.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-vk-invert-y.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.cpp" />
- <ClCompile Include="..\..\..\source\slang\slang-ir-wrap-global-context.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-language-server-ast-lookup.cpp" />
diff --git a/build/visual-studio/slang/slang.vcxproj.filters b/build/visual-studio/slang/slang.vcxproj.filters
index 8e33fcfbb..f87a64ab1 100644
--- a/build/visual-studio/slang/slang.vcxproj.filters
+++ b/build/visual-studio/slang/slang.vcxproj.filters
@@ -549,9 +549,6 @@
<ClInclude Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.h">
<Filter>Header Files</Filter>
</ClInclude>
- <ClInclude Include="..\..\..\source\slang\slang-ir-wrap-global-context.h">
- <Filter>Header Files</Filter>
- </ClInclude>
<ClInclude Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -1244,9 +1241,6 @@
<ClCompile Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.cpp">
<Filter>Source Files</Filter>
</ClCompile>
- <ClCompile Include="..\..\..\source\slang\slang-ir-wrap-global-context.cpp">
- <Filter>Source Files</Filter>
- </ClCompile>
<ClCompile Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp
index afdd37fce..39ebaa64d 100644
--- a/source/slang/slang-emit.cpp
+++ b/source/slang/slang-emit.cpp
@@ -70,7 +70,6 @@
#include "slang-ir-synthesize-active-mask.h"
#include "slang-ir-validate.h"
#include "slang-ir-wrap-structured-buffers.h"
-#include "slang-ir-wrap-global-context.h"
#include "slang-ir-liveness.h"
#include "slang-ir-glsl-liveness.h"
#include "slang-ir-translate-glsl-global-var.h"
@@ -886,9 +885,9 @@ Result linkAndOptimizeIR(
case CodeGenTarget::GLSL:
case CodeGenTarget::SPIRV:
case CodeGenTarget::SPIRVAssembly:
- case CodeGenTarget::Metal:
moveGlobalVarInitializationToEntryPoints(irModule);
break;
+ case CodeGenTarget::Metal:
case CodeGenTarget::CPPSource:
case CodeGenTarget::CUDASource:
moveGlobalVarInitializationToEntryPoints(irModule);
@@ -1097,12 +1096,6 @@ Result linkAndOptimizeIR(
validateIRModuleIfEnabled(codeGenContext, irModule);
}
- // Metal does not allow global variables and global parameters, so
- // we need to convert them into an explicit global context parameter
- // passed around through a function parameter.
- if (target == CodeGenTarget::Metal)
- wrapGlobalScopeInContextType(irModule);
-
auto metadata = new ArtifactPostEmitMetadata;
outLinkedIR.metadata = metadata;
diff --git a/source/slang/slang-ir-explicit-global-context.cpp b/source/slang/slang-ir-explicit-global-context.cpp
index ab3a8bb51..f63ceb71e 100644
--- a/source/slang/slang-ir-explicit-global-context.cpp
+++ b/source/slang/slang-ir-explicit-global-context.cpp
@@ -2,6 +2,7 @@
#include "slang-ir-explicit-global-context.h"
#include "slang-ir-insts.h"
+#include "slang-ir-clone.h"
namespace Slang
{
@@ -19,7 +20,7 @@ struct IntroduceExplicitGlobalContextPass
IRStructType* m_contextStructType = nullptr;
IRPtrType* m_contextStructPtrType = nullptr;
- IRGlobalParam* m_globalUniformsParam = nullptr;
+ List<IRGlobalParam*> m_globalParams;
List<IRGlobalVar*> m_globalVars;
List<IRFunc*> m_entryPoints;
@@ -80,17 +81,26 @@ struct IntroduceExplicitGlobalContextPass
// One detail we need to be careful about is that as a result
- // of legalizing the varying parameters of kernels, we can end
- // up with global parameters for varying parameters on CUDA
- // (e.g., to represent `threadIdx`. We thus skip any global-scope
- // parameters that are varying instead of uniform.
+ // of legalizing the varying parameters of compute kernels to
+ // CPU or CUDA, we can end up with global parameters for varying
+ // parameters on CUDA (e.g., to represent `threadIdx`. We thus
+ // skip any global-scope parameters that are varying instead of
+ // uniform.
//
- auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>();
- SLANG_ASSERT(layoutDecor);
- auto layout = as<IRVarLayout>(layoutDecor->getLayout());
- SLANG_ASSERT(layout);
- if(isVaryingParameter(layout))
- continue;
+ switch (m_target)
+ {
+ case CodeGenTarget::CUDASource:
+ case CodeGenTarget::CPPSource:
+ {
+ auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>();
+ SLANG_ASSERT(layoutDecor);
+ auto layout = as<IRVarLayout>(layoutDecor->getLayout());
+ SLANG_ASSERT(layout);
+ if (isVaryingParameter(layout))
+ continue;
+ }
+ break;
+ }
// Because of upstream passes, we expect there to be only a
// single global uniform parameter (at most).
@@ -105,8 +115,7 @@ struct IntroduceExplicitGlobalContextPass
if(m_target == CodeGenTarget::CUDASource)
continue;
- SLANG_ASSERT(!m_globalUniformsParam);
- m_globalUniformsParam = globalParam;
+ m_globalParams.add(globalParam);
}
break;
@@ -130,15 +139,15 @@ struct IntroduceExplicitGlobalContextPass
}
// If there are no global-scope entities that require processing,
- // then we can completely skip the work of this pass for CUDA.
+ // then we can completely skip the work of this pass for CUDA/Metal.
//
// Note: We cannot skip the rest of the pass for CPU, because
// it is responsible for introducing the explicit entry-point
// parameter that is used for passing in the global param(s).
//
- if( m_target == CodeGenTarget::CUDASource )
+ if( m_target != CodeGenTarget::CPPSource )
{
- if( !m_globalUniformsParam && (m_globalVars.getCount() == 0) )
+ if (m_globalParams.getCount() == 0 && m_globalVars.getCount() == 0)
{
return;
}
@@ -156,7 +165,7 @@ struct IntroduceExplicitGlobalContextPass
// The context will usually be passed around by pointer,
// so we get and cache that pointer type up front.
//
- m_contextStructPtrType = builder.getPtrType(m_contextStructType);
+ m_contextStructPtrType = builder.getPtrType(kIROp_PtrType, m_contextStructType, (IRIntegerValue)AddressSpace::ThreadLocal);
// The first step will be to create fields in the `KernelContext`
@@ -166,12 +175,13 @@ struct IntroduceExplicitGlobalContextPass
// in a dictionary, so that we can find them later based on
// the global parameter/variable.
//
- if( m_globalUniformsParam )
+ for (auto globalParam : m_globalParams)
{
// For the parameter representing all the global uniform shader
// parameters, we create a field that exactly matches its type.
//
- createContextStructField(m_globalUniformsParam, m_globalUniformsParam->getFullType());
+
+ createContextStructField(globalParam, globalParam->getFullType());
}
for( auto globalVar : m_globalVars )
{
@@ -204,9 +214,9 @@ struct IntroduceExplicitGlobalContextPass
// above, but other functions will have an explicit context parameter
// added on demand.
//
- if( m_globalUniformsParam )
+ for (auto globalParam : m_globalParams)
{
- replaceUsesOfGlobalParam(m_globalUniformsParam);
+ replaceUsesOfGlobalParam(globalParam);
}
for( auto globalVar : m_globalVars )
{
@@ -234,23 +244,11 @@ struct IntroduceExplicitGlobalContextPass
// of the appropraite type.
//
auto key = builder.createStructKey();
- auto field = builder.createStructField(m_contextStructType, key, type);
+ builder.createStructField(m_contextStructType, key, type);
- // If the original instruction had a name hint on it,
- // then we transfer that name hint over to the key,
- // so that the field will have the name of the former
- // global variable/parameter.
- //
- if( auto nameHint = originalInst->findDecoration<IRNameHintDecoration>() )
- {
- nameHint->insertAtStart(key);
- }
-
- // Any other decorations on the original instruction
- // (e.g., pertaining to layout) need to be transferred
- // over to the field (not the key).
- //
- originalInst->transferDecorationsTo(field);
+ // Clone all original decorations to the new struct key.
+ IRCloneEnv cloneEnv;
+ cloneInstDecorationsAndChildren(&cloneEnv, m_module, originalInst, key);
// We end by making note of the key that was created
// for the instruction, so that we can use the key
@@ -280,21 +278,26 @@ struct IntroduceExplicitGlobalContextPass
// then we need to introduce an explicit parameter onto
// each entry-point function to represent it.
//
- IRParam* globalUniformsParam = nullptr;
- if( m_globalUniformsParam )
+ struct GlobalParamInfo
{
- globalUniformsParam = builder.createParam(m_globalUniformsParam->getFullType());
- if( auto nameHint = m_globalUniformsParam->findDecoration<IRNameHintDecoration>() )
- {
- builder.addNameHintDecoration(globalUniformsParam, nameHint->getNameOperand());
- }
+ IRGlobalParam* globalParam;
+ IRParam* entryPointParam;
+ };
+ List<GlobalParamInfo> entryPointParams;
+ for (auto globalParam : m_globalParams)
+ {
+ auto entryPointParam = builder.createParam(globalParam->getFullType());
+ IRCloneEnv cloneEnv;
+ cloneInstDecorationsAndChildren(&cloneEnv, m_module, globalParam, entryPointParam);
+ entryPointParams.add({globalParam, entryPointParam});
// The new parameter will be the last one in the
// parameter list of the entry point.
//
- globalUniformsParam->insertBefore(firstOrdinary);
+ entryPointParam->insertBefore(firstOrdinary);
}
- else if(m_target == CodeGenTarget::CPPSource)
+
+ if (m_target == CodeGenTarget::CPPSource && m_globalParams.getCount() == 0)
{
// The nature of our current ABI for entry points on CPU
// means that we need an explicit parameter to be *declared*
@@ -316,17 +319,17 @@ struct IntroduceExplicitGlobalContextPass
// to inialize the corresponding field of the `KernelContext`
// before moving on with execution of the kernel body.
//
- if(m_globalUniformsParam)
+ for (auto entryPointParam : entryPointParams)
{
- auto fieldKey = m_mapInstToContextFieldKey[m_globalUniformsParam];
- auto fieldType = globalUniformsParam->getFullType();
+ auto fieldKey = m_mapInstToContextFieldKey[entryPointParam.globalParam];
+ auto fieldType = entryPointParam.globalParam->getFullType();
auto fieldPtrType = builder.getPtrType(fieldType);
// We compute the addrress of the field and store the
// value of the parameter into it.
//
auto fieldPtr = builder.emitFieldAddress(fieldPtrType, contextVarPtr, fieldKey);
- builder.emitStore(fieldPtr, globalUniformsParam);
+ builder.emitStore(fieldPtr, entryPointParam.entryPointParam);
}
// Note: at this point the `KernelContext` has additional
diff --git a/source/slang/slang-ir-wrap-global-context.cpp b/source/slang/slang-ir-wrap-global-context.cpp
deleted file mode 100644
index 32bf7995c..000000000
--- a/source/slang/slang-ir-wrap-global-context.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-#include "slang-ir-wrap-global-context.h"
-
-#include "slang-ir-util.h"
-
-namespace Slang
-{
- struct WrapGlobalScopeContext
- {
- List<IRFunc*> entryPoints;
- IRStructType* contextType;
- struct GlobalVarInfo
- {
- IRStructKey* key;
- };
- Dictionary<IRInst*, GlobalVarInfo> mapGlobalVarToInfo;
- struct FuncInfo
- {
- IRInst* contextArg;
- };
- Dictionary<IRFunc*, FuncInfo> mapFuncToInfo;
- IRStringLit* findNameHint(IRInst* inst)
- {
- if (auto nameDecor = inst->findDecoration<IRNameHintDecoration>())
- return nameDecor->getNameOperand();
- if (auto linkageDecor = inst->findDecoration<IRLinkageDecoration>())
- return linkageDecor->getMangledNameOperand();
- return nullptr;
- }
-
- // Move all global parameters to the entry point parameters,
- // and replace them with global variables that are initialized with
- // the entry point parameters.
- void moveGlobalParametersToEntryPoint(IRModule* module)
- {
- Dictionary<IRInst*, IRInst*> mapGlobalParamToGlobalVar;
-
- IRBuilder builder(module);
-
- for (auto globalInst : module->getGlobalInsts())
- {
- if (auto globalParam = as<IRGlobalParam>(globalInst))
- {
- builder.setInsertBefore(globalParam);
- auto globalVar = builder.createGlobalVar(
- globalParam->getFullType(),
- (int)AddressSpace::ThreadLocal);
- if (auto name = findNameHint(globalParam))
- builder.addNameHintDecoration(globalVar, name);
- mapGlobalParamToGlobalVar[globalParam] = globalVar;
- }
- }
-
- // For every entry point, we need to add a new parameter for each global parameter.
- for (auto entryPoint : entryPoints)
- {
- auto firstBlock = entryPoint->getFirstBlock();
- auto paramInsertPoint = firstBlock->getFirstInst();
- struct ParamInfo
- {
- IRInst* newParam;
- IRInst* globalVar;
- };
- List<ParamInfo> newParams;
- for (auto globalParam : mapGlobalParamToGlobalVar)
- {
- auto newParam = builder.createParam(globalParam.first->getFullType());
- newParam->insertBefore(paramInsertPoint);
- globalParam.first->transferDecorationsTo(newParam);
- newParams.add({newParam, globalParam.second});
- }
-
- // Insert assignments to the global variables at the start of the entry point.
- builder.setInsertBefore(firstBlock->getFirstOrdinaryInst());
- for (auto& paramInfo : newParams)
- {
- auto globalVar = paramInfo.globalVar;
- auto newParam = paramInfo.newParam;
- builder.emitStore(globalVar, newParam);
- }
- }
-
- // Replace all uses of global parameters with a load from the global variable.
- for (auto globalParam : mapGlobalParamToGlobalVar)
- {
- auto globalVar = globalParam.second;
- traverseUses(globalParam.first, [&](IRUse* use)
- {
- auto user = use->getUser();
- builder.setInsertBefore(user);
- auto load = builder.emitLoad(globalParam.first->getFullType(), globalVar);
- builder.replaceOperand(use, load);
- });
- globalParam.first->removeAndDeallocate();
- }
- }
-
- void processModule(IRModule* module)
- {
- IRBuilder builder(module);
- List<IRInst*> instsToRemove;
-
- List<IRFunc*> functions;
-
- // Collect all entry points and functions.
- for (auto globalInst : module->getGlobalInsts())
- {
- if (globalInst->findDecoration<IREntryPointDecoration>())
- entryPoints.add(as<IRFunc>(globalInst));
- if (auto func = as<IRFunc>(globalInst))
- functions.add(func);
- }
-
- // Before everything, we need to move all global parameters to the entry point parameters.
- // For each global parameter, e.g. `uniform float4 g;`, we will replace it with a global
- // variable, e.g. `float4 _g;`, and add a new parameter to the each entry point, and copy
- // the value from the entry point parameter to the global variable.
- moveGlobalParametersToEntryPoint(module);
-
- // The next step is to wrap all global variables in a context type, and pass them around
- // with explicit function parameters.
-
- // Collect all global variables.
- for (auto globalInst : module->getGlobalInsts())
- {
- if (auto globalVar = as<IRGlobalVar>(globalInst))
- {
- auto key = builder.createStructKey();
-
- if (auto name = findNameHint(globalVar))
- builder.addNameHintDecoration(key, name);
-
- GlobalVarInfo info;
- info.key = key;
- mapGlobalVarToInfo[globalVar] = info;
- }
- }
- if (mapGlobalVarToInfo.getCount() == 0)
- return;
-
- // Create the context type for the global scope.
- contextType = builder.createStructType();
- builder.addNameHintDecoration(contextType, toSlice("_SlangGlobalContext"));
- for (auto& fieldKV : mapGlobalVarToInfo)
- {
- auto ptrType = as<IRPtrTypeBase>(fieldKV.first->getFullType());
- if (!ptrType)
- continue;
- builder.createStructField(
- contextType, fieldKV.second.key, ptrType->getValueType());
- }
-
- // Identify all functions that requires the global scope context.
-
- // First, add all functions to the work list if it directly uses a global variable.
- List<IRFunc*> funcWorkList;
- HashSet<IRFunc*> funcWorkListSet;
- for (auto& fieldKV : mapGlobalVarToInfo)
- {
- auto globalVar = fieldKV.first;
- for (auto use = globalVar->firstUse; use; use = use->nextUse)
- {
- if (auto userFunc = getParentFunc(use->getUser()))
- {
- if (funcWorkListSet.add(userFunc))
- funcWorkList.add(userFunc);
- }
- }
- }
-
- // Next, propagate the call graph and add all functions that transitively uses a global variable.
- for (Index i = 0; i < funcWorkList.getCount(); i++)
- {
- auto func = funcWorkList[i];
- for (auto use = func->firstUse; use; use = use->nextUse)
- {
- if (auto call = as<IRCall>(use->getUser()))
- {
- if (call->getCallee() != func)
- continue;
- if (auto callerFunc = as<IRFunc>(getParentFunc(call)))
- {
- if (funcWorkListSet.add(callerFunc))
- funcWorkList.add(callerFunc);
- }
- }
- }
- }
-
- // Now, everything in funcWorkListSet is a function that requires the global scope context.
- // We go ahead and add the context type as the first parameter to these functions.
- List<IRInst*> newCallArgs;
-
- auto threadPtrType = builder.getPtrType(kIROp_PtrType, contextType, (int)AddressSpace::ThreadLocal);
- for (auto func : funcWorkListSet)
- {
- auto firstBlock = func->getFirstBlock();
- if (!firstBlock)
- continue;
- bool isEntryPoint = func->findDecoration<IREntryPointDecoration>() != nullptr;
- FuncInfo funcInfo = {};
- if (isEntryPoint)
- {
- // If the function is an entry point, we need to declare a local variable to hold the context.
- setInsertBeforeOrdinaryInst(&builder, firstBlock->getFirstOrdinaryInst());
- funcInfo.contextArg = builder.emitVar(contextType, (int)AddressSpace::ThreadLocal);
- }
- else
- {
- // For other functions, we just add the context as the first parameter.
- builder.setInsertBefore(firstBlock->getFirstInst());
- funcInfo.contextArg = builder.emitParamAtHead(threadPtrType);
- }
- builder.addNameHintDecoration(funcInfo.contextArg, toSlice("_globalCtx"));
-
- mapFuncToInfo[func] = funcInfo;
-
- // Now go through the body of the function and insert the context as the first argument to all calls.
- for (auto block : func->getBlocks())
- {
- for (auto inst : block->getChildren())
- {
- if (auto call = as<IRCall>(inst))
- {
- if (funcWorkListSet.contains((IRFunc*)getResolvedInstForDecorations(call->getCallee())))
- {
- builder.setInsertBefore(call);
- newCallArgs.clear();
- newCallArgs.add(funcInfo.contextArg);
- for (auto arg : call->getArgsList())
- newCallArgs.add(arg);
- auto newCall = builder.emitCallInst(call->getFullType(), call->getCallee(), newCallArgs);
- call->replaceUsesWith(newCall);
- instsToRemove.add(call);
- }
- }
- }
- }
- }
-
- // Next, we need to replace all accesses to global variables with accesses to the context.
- for (auto globalVarKV : mapGlobalVarToInfo)
- {
- auto globalVar = globalVarKV.first;
- auto key = globalVarKV.second.key;
- traverseUses(globalVar, [&](IRUse* use)
- {
- auto user = use->getUser();
- auto parentFunc = getParentFunc(user);
- if (!parentFunc)
- return;
- auto funcInfo = mapFuncToInfo.tryGetValue(parentFunc);
- SLANG_ASSERT(funcInfo);
-
- auto contextArg = funcInfo->contextArg;
- builder.setInsertBefore(user);
- auto replacement = builder.emitFieldAddress(
- builder.getPtrType(
- kIROp_PtrType,
- tryGetPointedToType(&builder, globalVar->getFullType()),
- (int)AddressSpace::ThreadLocal),
- contextArg,
- key);
- builder.replaceOperand(use, replacement);
- });
- SLANG_ASSERT(!globalVar->hasUses());
- instsToRemove.add(globalVar);
- }
-
- // Fix up all function types.
- for (auto func : functions)
- {
- fixUpFuncType(func);
- }
-
- // Finally, cleanup the IR by removing all the insts scheduled for removal.
- for (auto inst : instsToRemove)
- inst->removeAndDeallocate();
- }
- };
-
- void wrapGlobalScopeInContextType(IRModule* module)
- {
- WrapGlobalScopeContext context;
- context.processModule(module);
- }
-}
diff --git a/source/slang/slang-ir-wrap-global-context.h b/source/slang/slang-ir-wrap-global-context.h
deleted file mode 100644
index 1cd411e0a..000000000
--- a/source/slang/slang-ir-wrap-global-context.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include "slang-ir.h"
-
-namespace Slang
-{
- // The metal backend does not support global variables or parameters.
- // To workaround this restriction, we use this pass to wrap all the
- // global scope variables in a context type, and pass that context
- // type as the first parameter to all functions.
-
- void wrapGlobalScopeInContextType(IRModule* module);
-
-}