diff options
| author | Yong He <yonghe@outlook.com> | 2024-05-06 14:53:27 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-06 14:53:27 -0700 |
| commit | 618428a87b8295347288262ea13eff63cc62aa56 (patch) | |
| tree | a602766742a437c3f0577ae6e3c1077c1b4e082c | |
| parent | 2220d26c24b259075182672be22e1494b4affc95 (diff) | |
Delete `wrap-global-context` pass. (#4114)
* Delete `wrap-global-context` pass.
The pass was added for the metal backend without realizing that the existing `explicit-global-context` does 99% of the job. Instead of duplicating the logic in a different pass for metal, we extend same explicit-global-context pass to work for metal.
* Fix build.
| -rw-r--r-- | build/visual-studio/slang/slang.vcxproj | 2 | ||||
| -rw-r--r-- | build/visual-studio/slang/slang.vcxproj.filters | 6 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 9 | ||||
| -rw-r--r-- | source/slang/slang-ir-explicit-global-context.cpp | 103 | ||||
| -rw-r--r-- | source/slang/slang-ir-wrap-global-context.cpp | 286 | ||||
| -rw-r--r-- | source/slang/slang-ir-wrap-global-context.h | 14 |
6 files changed, 54 insertions, 366 deletions
diff --git a/build/visual-studio/slang/slang.vcxproj b/build/visual-studio/slang/slang.vcxproj index f5e1c4c5d..ee281252d 100644 --- a/build/visual-studio/slang/slang.vcxproj +++ b/build/visual-studio/slang/slang.vcxproj @@ -487,7 +487,6 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla <ClInclude Include="..\..\..\source\slang\slang-ir-variable-scope-correction.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-vk-invert-y.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.h" />
- <ClInclude Include="..\..\..\source\slang\slang-ir-wrap-global-context.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.h" />
<ClInclude Include="..\..\..\source\slang\slang-ir.h" />
<ClInclude Include="..\..\..\source\slang\slang-language-server-ast-lookup.h" />
@@ -720,7 +719,6 @@ IF EXIST ..\..\..\external\slang-glslang\bin\windows-aarch64\release\slang-glsla <ClCompile Include="..\..\..\source\slang\slang-ir-variable-scope-correction.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-vk-invert-y.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.cpp" />
- <ClCompile Include="..\..\..\source\slang\slang-ir-wrap-global-context.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-ir.cpp" />
<ClCompile Include="..\..\..\source\slang\slang-language-server-ast-lookup.cpp" />
diff --git a/build/visual-studio/slang/slang.vcxproj.filters b/build/visual-studio/slang/slang.vcxproj.filters index 8e33fcfbb..f87a64ab1 100644 --- a/build/visual-studio/slang/slang.vcxproj.filters +++ b/build/visual-studio/slang/slang.vcxproj.filters @@ -549,9 +549,6 @@ <ClInclude Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.h">
<Filter>Header Files</Filter>
</ClInclude>
- <ClInclude Include="..\..\..\source\slang\slang-ir-wrap-global-context.h">
- <Filter>Header Files</Filter>
- </ClInclude>
<ClInclude Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -1244,9 +1241,6 @@ <ClCompile Include="..\..\..\source\slang\slang-ir-witness-table-wrapper.cpp">
<Filter>Source Files</Filter>
</ClCompile>
- <ClCompile Include="..\..\..\source\slang\slang-ir-wrap-global-context.cpp">
- <Filter>Source Files</Filter>
- </ClCompile>
<ClCompile Include="..\..\..\source\slang\slang-ir-wrap-structured-buffers.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index afdd37fce..39ebaa64d 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -70,7 +70,6 @@ #include "slang-ir-synthesize-active-mask.h" #include "slang-ir-validate.h" #include "slang-ir-wrap-structured-buffers.h" -#include "slang-ir-wrap-global-context.h" #include "slang-ir-liveness.h" #include "slang-ir-glsl-liveness.h" #include "slang-ir-translate-glsl-global-var.h" @@ -886,9 +885,9 @@ Result linkAndOptimizeIR( case CodeGenTarget::GLSL: case CodeGenTarget::SPIRV: case CodeGenTarget::SPIRVAssembly: - case CodeGenTarget::Metal: moveGlobalVarInitializationToEntryPoints(irModule); break; + case CodeGenTarget::Metal: case CodeGenTarget::CPPSource: case CodeGenTarget::CUDASource: moveGlobalVarInitializationToEntryPoints(irModule); @@ -1097,12 +1096,6 @@ Result linkAndOptimizeIR( validateIRModuleIfEnabled(codeGenContext, irModule); } - // Metal does not allow global variables and global parameters, so - // we need to convert them into an explicit global context parameter - // passed around through a function parameter. - if (target == CodeGenTarget::Metal) - wrapGlobalScopeInContextType(irModule); - auto metadata = new ArtifactPostEmitMetadata; outLinkedIR.metadata = metadata; diff --git a/source/slang/slang-ir-explicit-global-context.cpp b/source/slang/slang-ir-explicit-global-context.cpp index ab3a8bb51..f63ceb71e 100644 --- a/source/slang/slang-ir-explicit-global-context.cpp +++ b/source/slang/slang-ir-explicit-global-context.cpp @@ -2,6 +2,7 @@ #include "slang-ir-explicit-global-context.h" #include "slang-ir-insts.h" +#include "slang-ir-clone.h" namespace Slang { @@ -19,7 +20,7 @@ struct IntroduceExplicitGlobalContextPass IRStructType* m_contextStructType = nullptr; IRPtrType* m_contextStructPtrType = nullptr; - IRGlobalParam* m_globalUniformsParam = nullptr; + List<IRGlobalParam*> m_globalParams; List<IRGlobalVar*> m_globalVars; List<IRFunc*> m_entryPoints; @@ -80,17 +81,26 @@ struct IntroduceExplicitGlobalContextPass // One detail we need to be careful about is that as a result - // of legalizing the varying parameters of kernels, we can end - // up with global parameters for varying parameters on CUDA - // (e.g., to represent `threadIdx`. We thus skip any global-scope - // parameters that are varying instead of uniform. + // of legalizing the varying parameters of compute kernels to + // CPU or CUDA, we can end up with global parameters for varying + // parameters on CUDA (e.g., to represent `threadIdx`. We thus + // skip any global-scope parameters that are varying instead of + // uniform. // - auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>(); - SLANG_ASSERT(layoutDecor); - auto layout = as<IRVarLayout>(layoutDecor->getLayout()); - SLANG_ASSERT(layout); - if(isVaryingParameter(layout)) - continue; + switch (m_target) + { + case CodeGenTarget::CUDASource: + case CodeGenTarget::CPPSource: + { + auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>(); + SLANG_ASSERT(layoutDecor); + auto layout = as<IRVarLayout>(layoutDecor->getLayout()); + SLANG_ASSERT(layout); + if (isVaryingParameter(layout)) + continue; + } + break; + } // Because of upstream passes, we expect there to be only a // single global uniform parameter (at most). @@ -105,8 +115,7 @@ struct IntroduceExplicitGlobalContextPass if(m_target == CodeGenTarget::CUDASource) continue; - SLANG_ASSERT(!m_globalUniformsParam); - m_globalUniformsParam = globalParam; + m_globalParams.add(globalParam); } break; @@ -130,15 +139,15 @@ struct IntroduceExplicitGlobalContextPass } // If there are no global-scope entities that require processing, - // then we can completely skip the work of this pass for CUDA. + // then we can completely skip the work of this pass for CUDA/Metal. // // Note: We cannot skip the rest of the pass for CPU, because // it is responsible for introducing the explicit entry-point // parameter that is used for passing in the global param(s). // - if( m_target == CodeGenTarget::CUDASource ) + if( m_target != CodeGenTarget::CPPSource ) { - if( !m_globalUniformsParam && (m_globalVars.getCount() == 0) ) + if (m_globalParams.getCount() == 0 && m_globalVars.getCount() == 0) { return; } @@ -156,7 +165,7 @@ struct IntroduceExplicitGlobalContextPass // The context will usually be passed around by pointer, // so we get and cache that pointer type up front. // - m_contextStructPtrType = builder.getPtrType(m_contextStructType); + m_contextStructPtrType = builder.getPtrType(kIROp_PtrType, m_contextStructType, (IRIntegerValue)AddressSpace::ThreadLocal); // The first step will be to create fields in the `KernelContext` @@ -166,12 +175,13 @@ struct IntroduceExplicitGlobalContextPass // in a dictionary, so that we can find them later based on // the global parameter/variable. // - if( m_globalUniformsParam ) + for (auto globalParam : m_globalParams) { // For the parameter representing all the global uniform shader // parameters, we create a field that exactly matches its type. // - createContextStructField(m_globalUniformsParam, m_globalUniformsParam->getFullType()); + + createContextStructField(globalParam, globalParam->getFullType()); } for( auto globalVar : m_globalVars ) { @@ -204,9 +214,9 @@ struct IntroduceExplicitGlobalContextPass // above, but other functions will have an explicit context parameter // added on demand. // - if( m_globalUniformsParam ) + for (auto globalParam : m_globalParams) { - replaceUsesOfGlobalParam(m_globalUniformsParam); + replaceUsesOfGlobalParam(globalParam); } for( auto globalVar : m_globalVars ) { @@ -234,23 +244,11 @@ struct IntroduceExplicitGlobalContextPass // of the appropraite type. // auto key = builder.createStructKey(); - auto field = builder.createStructField(m_contextStructType, key, type); + builder.createStructField(m_contextStructType, key, type); - // If the original instruction had a name hint on it, - // then we transfer that name hint over to the key, - // so that the field will have the name of the former - // global variable/parameter. - // - if( auto nameHint = originalInst->findDecoration<IRNameHintDecoration>() ) - { - nameHint->insertAtStart(key); - } - - // Any other decorations on the original instruction - // (e.g., pertaining to layout) need to be transferred - // over to the field (not the key). - // - originalInst->transferDecorationsTo(field); + // Clone all original decorations to the new struct key. + IRCloneEnv cloneEnv; + cloneInstDecorationsAndChildren(&cloneEnv, m_module, originalInst, key); // We end by making note of the key that was created // for the instruction, so that we can use the key @@ -280,21 +278,26 @@ struct IntroduceExplicitGlobalContextPass // then we need to introduce an explicit parameter onto // each entry-point function to represent it. // - IRParam* globalUniformsParam = nullptr; - if( m_globalUniformsParam ) + struct GlobalParamInfo { - globalUniformsParam = builder.createParam(m_globalUniformsParam->getFullType()); - if( auto nameHint = m_globalUniformsParam->findDecoration<IRNameHintDecoration>() ) - { - builder.addNameHintDecoration(globalUniformsParam, nameHint->getNameOperand()); - } + IRGlobalParam* globalParam; + IRParam* entryPointParam; + }; + List<GlobalParamInfo> entryPointParams; + for (auto globalParam : m_globalParams) + { + auto entryPointParam = builder.createParam(globalParam->getFullType()); + IRCloneEnv cloneEnv; + cloneInstDecorationsAndChildren(&cloneEnv, m_module, globalParam, entryPointParam); + entryPointParams.add({globalParam, entryPointParam}); // The new parameter will be the last one in the // parameter list of the entry point. // - globalUniformsParam->insertBefore(firstOrdinary); + entryPointParam->insertBefore(firstOrdinary); } - else if(m_target == CodeGenTarget::CPPSource) + + if (m_target == CodeGenTarget::CPPSource && m_globalParams.getCount() == 0) { // The nature of our current ABI for entry points on CPU // means that we need an explicit parameter to be *declared* @@ -316,17 +319,17 @@ struct IntroduceExplicitGlobalContextPass // to inialize the corresponding field of the `KernelContext` // before moving on with execution of the kernel body. // - if(m_globalUniformsParam) + for (auto entryPointParam : entryPointParams) { - auto fieldKey = m_mapInstToContextFieldKey[m_globalUniformsParam]; - auto fieldType = globalUniformsParam->getFullType(); + auto fieldKey = m_mapInstToContextFieldKey[entryPointParam.globalParam]; + auto fieldType = entryPointParam.globalParam->getFullType(); auto fieldPtrType = builder.getPtrType(fieldType); // We compute the addrress of the field and store the // value of the parameter into it. // auto fieldPtr = builder.emitFieldAddress(fieldPtrType, contextVarPtr, fieldKey); - builder.emitStore(fieldPtr, globalUniformsParam); + builder.emitStore(fieldPtr, entryPointParam.entryPointParam); } // Note: at this point the `KernelContext` has additional diff --git a/source/slang/slang-ir-wrap-global-context.cpp b/source/slang/slang-ir-wrap-global-context.cpp deleted file mode 100644 index 32bf7995c..000000000 --- a/source/slang/slang-ir-wrap-global-context.cpp +++ /dev/null @@ -1,286 +0,0 @@ -#include "slang-ir-wrap-global-context.h" - -#include "slang-ir-util.h" - -namespace Slang -{ - struct WrapGlobalScopeContext - { - List<IRFunc*> entryPoints; - IRStructType* contextType; - struct GlobalVarInfo - { - IRStructKey* key; - }; - Dictionary<IRInst*, GlobalVarInfo> mapGlobalVarToInfo; - struct FuncInfo - { - IRInst* contextArg; - }; - Dictionary<IRFunc*, FuncInfo> mapFuncToInfo; - IRStringLit* findNameHint(IRInst* inst) - { - if (auto nameDecor = inst->findDecoration<IRNameHintDecoration>()) - return nameDecor->getNameOperand(); - if (auto linkageDecor = inst->findDecoration<IRLinkageDecoration>()) - return linkageDecor->getMangledNameOperand(); - return nullptr; - } - - // Move all global parameters to the entry point parameters, - // and replace them with global variables that are initialized with - // the entry point parameters. - void moveGlobalParametersToEntryPoint(IRModule* module) - { - Dictionary<IRInst*, IRInst*> mapGlobalParamToGlobalVar; - - IRBuilder builder(module); - - for (auto globalInst : module->getGlobalInsts()) - { - if (auto globalParam = as<IRGlobalParam>(globalInst)) - { - builder.setInsertBefore(globalParam); - auto globalVar = builder.createGlobalVar( - globalParam->getFullType(), - (int)AddressSpace::ThreadLocal); - if (auto name = findNameHint(globalParam)) - builder.addNameHintDecoration(globalVar, name); - mapGlobalParamToGlobalVar[globalParam] = globalVar; - } - } - - // For every entry point, we need to add a new parameter for each global parameter. - for (auto entryPoint : entryPoints) - { - auto firstBlock = entryPoint->getFirstBlock(); - auto paramInsertPoint = firstBlock->getFirstInst(); - struct ParamInfo - { - IRInst* newParam; - IRInst* globalVar; - }; - List<ParamInfo> newParams; - for (auto globalParam : mapGlobalParamToGlobalVar) - { - auto newParam = builder.createParam(globalParam.first->getFullType()); - newParam->insertBefore(paramInsertPoint); - globalParam.first->transferDecorationsTo(newParam); - newParams.add({newParam, globalParam.second}); - } - - // Insert assignments to the global variables at the start of the entry point. - builder.setInsertBefore(firstBlock->getFirstOrdinaryInst()); - for (auto& paramInfo : newParams) - { - auto globalVar = paramInfo.globalVar; - auto newParam = paramInfo.newParam; - builder.emitStore(globalVar, newParam); - } - } - - // Replace all uses of global parameters with a load from the global variable. - for (auto globalParam : mapGlobalParamToGlobalVar) - { - auto globalVar = globalParam.second; - traverseUses(globalParam.first, [&](IRUse* use) - { - auto user = use->getUser(); - builder.setInsertBefore(user); - auto load = builder.emitLoad(globalParam.first->getFullType(), globalVar); - builder.replaceOperand(use, load); - }); - globalParam.first->removeAndDeallocate(); - } - } - - void processModule(IRModule* module) - { - IRBuilder builder(module); - List<IRInst*> instsToRemove; - - List<IRFunc*> functions; - - // Collect all entry points and functions. - for (auto globalInst : module->getGlobalInsts()) - { - if (globalInst->findDecoration<IREntryPointDecoration>()) - entryPoints.add(as<IRFunc>(globalInst)); - if (auto func = as<IRFunc>(globalInst)) - functions.add(func); - } - - // Before everything, we need to move all global parameters to the entry point parameters. - // For each global parameter, e.g. `uniform float4 g;`, we will replace it with a global - // variable, e.g. `float4 _g;`, and add a new parameter to the each entry point, and copy - // the value from the entry point parameter to the global variable. - moveGlobalParametersToEntryPoint(module); - - // The next step is to wrap all global variables in a context type, and pass them around - // with explicit function parameters. - - // Collect all global variables. - for (auto globalInst : module->getGlobalInsts()) - { - if (auto globalVar = as<IRGlobalVar>(globalInst)) - { - auto key = builder.createStructKey(); - - if (auto name = findNameHint(globalVar)) - builder.addNameHintDecoration(key, name); - - GlobalVarInfo info; - info.key = key; - mapGlobalVarToInfo[globalVar] = info; - } - } - if (mapGlobalVarToInfo.getCount() == 0) - return; - - // Create the context type for the global scope. - contextType = builder.createStructType(); - builder.addNameHintDecoration(contextType, toSlice("_SlangGlobalContext")); - for (auto& fieldKV : mapGlobalVarToInfo) - { - auto ptrType = as<IRPtrTypeBase>(fieldKV.first->getFullType()); - if (!ptrType) - continue; - builder.createStructField( - contextType, fieldKV.second.key, ptrType->getValueType()); - } - - // Identify all functions that requires the global scope context. - - // First, add all functions to the work list if it directly uses a global variable. - List<IRFunc*> funcWorkList; - HashSet<IRFunc*> funcWorkListSet; - for (auto& fieldKV : mapGlobalVarToInfo) - { - auto globalVar = fieldKV.first; - for (auto use = globalVar->firstUse; use; use = use->nextUse) - { - if (auto userFunc = getParentFunc(use->getUser())) - { - if (funcWorkListSet.add(userFunc)) - funcWorkList.add(userFunc); - } - } - } - - // Next, propagate the call graph and add all functions that transitively uses a global variable. - for (Index i = 0; i < funcWorkList.getCount(); i++) - { - auto func = funcWorkList[i]; - for (auto use = func->firstUse; use; use = use->nextUse) - { - if (auto call = as<IRCall>(use->getUser())) - { - if (call->getCallee() != func) - continue; - if (auto callerFunc = as<IRFunc>(getParentFunc(call))) - { - if (funcWorkListSet.add(callerFunc)) - funcWorkList.add(callerFunc); - } - } - } - } - - // Now, everything in funcWorkListSet is a function that requires the global scope context. - // We go ahead and add the context type as the first parameter to these functions. - List<IRInst*> newCallArgs; - - auto threadPtrType = builder.getPtrType(kIROp_PtrType, contextType, (int)AddressSpace::ThreadLocal); - for (auto func : funcWorkListSet) - { - auto firstBlock = func->getFirstBlock(); - if (!firstBlock) - continue; - bool isEntryPoint = func->findDecoration<IREntryPointDecoration>() != nullptr; - FuncInfo funcInfo = {}; - if (isEntryPoint) - { - // If the function is an entry point, we need to declare a local variable to hold the context. - setInsertBeforeOrdinaryInst(&builder, firstBlock->getFirstOrdinaryInst()); - funcInfo.contextArg = builder.emitVar(contextType, (int)AddressSpace::ThreadLocal); - } - else - { - // For other functions, we just add the context as the first parameter. - builder.setInsertBefore(firstBlock->getFirstInst()); - funcInfo.contextArg = builder.emitParamAtHead(threadPtrType); - } - builder.addNameHintDecoration(funcInfo.contextArg, toSlice("_globalCtx")); - - mapFuncToInfo[func] = funcInfo; - - // Now go through the body of the function and insert the context as the first argument to all calls. - for (auto block : func->getBlocks()) - { - for (auto inst : block->getChildren()) - { - if (auto call = as<IRCall>(inst)) - { - if (funcWorkListSet.contains((IRFunc*)getResolvedInstForDecorations(call->getCallee()))) - { - builder.setInsertBefore(call); - newCallArgs.clear(); - newCallArgs.add(funcInfo.contextArg); - for (auto arg : call->getArgsList()) - newCallArgs.add(arg); - auto newCall = builder.emitCallInst(call->getFullType(), call->getCallee(), newCallArgs); - call->replaceUsesWith(newCall); - instsToRemove.add(call); - } - } - } - } - } - - // Next, we need to replace all accesses to global variables with accesses to the context. - for (auto globalVarKV : mapGlobalVarToInfo) - { - auto globalVar = globalVarKV.first; - auto key = globalVarKV.second.key; - traverseUses(globalVar, [&](IRUse* use) - { - auto user = use->getUser(); - auto parentFunc = getParentFunc(user); - if (!parentFunc) - return; - auto funcInfo = mapFuncToInfo.tryGetValue(parentFunc); - SLANG_ASSERT(funcInfo); - - auto contextArg = funcInfo->contextArg; - builder.setInsertBefore(user); - auto replacement = builder.emitFieldAddress( - builder.getPtrType( - kIROp_PtrType, - tryGetPointedToType(&builder, globalVar->getFullType()), - (int)AddressSpace::ThreadLocal), - contextArg, - key); - builder.replaceOperand(use, replacement); - }); - SLANG_ASSERT(!globalVar->hasUses()); - instsToRemove.add(globalVar); - } - - // Fix up all function types. - for (auto func : functions) - { - fixUpFuncType(func); - } - - // Finally, cleanup the IR by removing all the insts scheduled for removal. - for (auto inst : instsToRemove) - inst->removeAndDeallocate(); - } - }; - - void wrapGlobalScopeInContextType(IRModule* module) - { - WrapGlobalScopeContext context; - context.processModule(module); - } -} diff --git a/source/slang/slang-ir-wrap-global-context.h b/source/slang/slang-ir-wrap-global-context.h deleted file mode 100644 index 1cd411e0a..000000000 --- a/source/slang/slang-ir-wrap-global-context.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include "slang-ir.h" - -namespace Slang -{ - // The metal backend does not support global variables or parameters. - // To workaround this restriction, we use this pass to wrap all the - // global scope variables in a context type, and pass that context - // type as the first parameter to all functions. - - void wrapGlobalScopeInContextType(IRModule* module); - -} |
