diff options
| author | Yong He <yonghe@outlook.com> | 2024-05-06 14:53:27 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-06 14:53:27 -0700 |
| commit | 618428a87b8295347288262ea13eff63cc62aa56 (patch) | |
| tree | a602766742a437c3f0577ae6e3c1077c1b4e082c /source/slang/slang-ir-explicit-global-context.cpp | |
| parent | 2220d26c24b259075182672be22e1494b4affc95 (diff) | |
Delete `wrap-global-context` pass. (#4114)
* Delete `wrap-global-context` pass.
The pass was added for the metal backend without realizing that the existing `explicit-global-context` does 99% of the job. Instead of duplicating the logic in a different pass for metal, we extend same explicit-global-context pass to work for metal.
* Fix build.
Diffstat (limited to 'source/slang/slang-ir-explicit-global-context.cpp')
| -rw-r--r-- | source/slang/slang-ir-explicit-global-context.cpp | 103 |
1 files changed, 53 insertions, 50 deletions
diff --git a/source/slang/slang-ir-explicit-global-context.cpp b/source/slang/slang-ir-explicit-global-context.cpp index ab3a8bb51..f63ceb71e 100644 --- a/source/slang/slang-ir-explicit-global-context.cpp +++ b/source/slang/slang-ir-explicit-global-context.cpp @@ -2,6 +2,7 @@ #include "slang-ir-explicit-global-context.h" #include "slang-ir-insts.h" +#include "slang-ir-clone.h" namespace Slang { @@ -19,7 +20,7 @@ struct IntroduceExplicitGlobalContextPass IRStructType* m_contextStructType = nullptr; IRPtrType* m_contextStructPtrType = nullptr; - IRGlobalParam* m_globalUniformsParam = nullptr; + List<IRGlobalParam*> m_globalParams; List<IRGlobalVar*> m_globalVars; List<IRFunc*> m_entryPoints; @@ -80,17 +81,26 @@ struct IntroduceExplicitGlobalContextPass // One detail we need to be careful about is that as a result - // of legalizing the varying parameters of kernels, we can end - // up with global parameters for varying parameters on CUDA - // (e.g., to represent `threadIdx`. We thus skip any global-scope - // parameters that are varying instead of uniform. + // of legalizing the varying parameters of compute kernels to + // CPU or CUDA, we can end up with global parameters for varying + // parameters on CUDA (e.g., to represent `threadIdx`. We thus + // skip any global-scope parameters that are varying instead of + // uniform. // - auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>(); - SLANG_ASSERT(layoutDecor); - auto layout = as<IRVarLayout>(layoutDecor->getLayout()); - SLANG_ASSERT(layout); - if(isVaryingParameter(layout)) - continue; + switch (m_target) + { + case CodeGenTarget::CUDASource: + case CodeGenTarget::CPPSource: + { + auto layoutDecor = globalParam->findDecoration<IRLayoutDecoration>(); + SLANG_ASSERT(layoutDecor); + auto layout = as<IRVarLayout>(layoutDecor->getLayout()); + SLANG_ASSERT(layout); + if (isVaryingParameter(layout)) + continue; + } + break; + } // Because of upstream passes, we expect there to be only a // single global uniform parameter (at most). @@ -105,8 +115,7 @@ struct IntroduceExplicitGlobalContextPass if(m_target == CodeGenTarget::CUDASource) continue; - SLANG_ASSERT(!m_globalUniformsParam); - m_globalUniformsParam = globalParam; + m_globalParams.add(globalParam); } break; @@ -130,15 +139,15 @@ struct IntroduceExplicitGlobalContextPass } // If there are no global-scope entities that require processing, - // then we can completely skip the work of this pass for CUDA. + // then we can completely skip the work of this pass for CUDA/Metal. // // Note: We cannot skip the rest of the pass for CPU, because // it is responsible for introducing the explicit entry-point // parameter that is used for passing in the global param(s). // - if( m_target == CodeGenTarget::CUDASource ) + if( m_target != CodeGenTarget::CPPSource ) { - if( !m_globalUniformsParam && (m_globalVars.getCount() == 0) ) + if (m_globalParams.getCount() == 0 && m_globalVars.getCount() == 0) { return; } @@ -156,7 +165,7 @@ struct IntroduceExplicitGlobalContextPass // The context will usually be passed around by pointer, // so we get and cache that pointer type up front. // - m_contextStructPtrType = builder.getPtrType(m_contextStructType); + m_contextStructPtrType = builder.getPtrType(kIROp_PtrType, m_contextStructType, (IRIntegerValue)AddressSpace::ThreadLocal); // The first step will be to create fields in the `KernelContext` @@ -166,12 +175,13 @@ struct IntroduceExplicitGlobalContextPass // in a dictionary, so that we can find them later based on // the global parameter/variable. // - if( m_globalUniformsParam ) + for (auto globalParam : m_globalParams) { // For the parameter representing all the global uniform shader // parameters, we create a field that exactly matches its type. // - createContextStructField(m_globalUniformsParam, m_globalUniformsParam->getFullType()); + + createContextStructField(globalParam, globalParam->getFullType()); } for( auto globalVar : m_globalVars ) { @@ -204,9 +214,9 @@ struct IntroduceExplicitGlobalContextPass // above, but other functions will have an explicit context parameter // added on demand. // - if( m_globalUniformsParam ) + for (auto globalParam : m_globalParams) { - replaceUsesOfGlobalParam(m_globalUniformsParam); + replaceUsesOfGlobalParam(globalParam); } for( auto globalVar : m_globalVars ) { @@ -234,23 +244,11 @@ struct IntroduceExplicitGlobalContextPass // of the appropraite type. // auto key = builder.createStructKey(); - auto field = builder.createStructField(m_contextStructType, key, type); + builder.createStructField(m_contextStructType, key, type); - // If the original instruction had a name hint on it, - // then we transfer that name hint over to the key, - // so that the field will have the name of the former - // global variable/parameter. - // - if( auto nameHint = originalInst->findDecoration<IRNameHintDecoration>() ) - { - nameHint->insertAtStart(key); - } - - // Any other decorations on the original instruction - // (e.g., pertaining to layout) need to be transferred - // over to the field (not the key). - // - originalInst->transferDecorationsTo(field); + // Clone all original decorations to the new struct key. + IRCloneEnv cloneEnv; + cloneInstDecorationsAndChildren(&cloneEnv, m_module, originalInst, key); // We end by making note of the key that was created // for the instruction, so that we can use the key @@ -280,21 +278,26 @@ struct IntroduceExplicitGlobalContextPass // then we need to introduce an explicit parameter onto // each entry-point function to represent it. // - IRParam* globalUniformsParam = nullptr; - if( m_globalUniformsParam ) + struct GlobalParamInfo { - globalUniformsParam = builder.createParam(m_globalUniformsParam->getFullType()); - if( auto nameHint = m_globalUniformsParam->findDecoration<IRNameHintDecoration>() ) - { - builder.addNameHintDecoration(globalUniformsParam, nameHint->getNameOperand()); - } + IRGlobalParam* globalParam; + IRParam* entryPointParam; + }; + List<GlobalParamInfo> entryPointParams; + for (auto globalParam : m_globalParams) + { + auto entryPointParam = builder.createParam(globalParam->getFullType()); + IRCloneEnv cloneEnv; + cloneInstDecorationsAndChildren(&cloneEnv, m_module, globalParam, entryPointParam); + entryPointParams.add({globalParam, entryPointParam}); // The new parameter will be the last one in the // parameter list of the entry point. // - globalUniformsParam->insertBefore(firstOrdinary); + entryPointParam->insertBefore(firstOrdinary); } - else if(m_target == CodeGenTarget::CPPSource) + + if (m_target == CodeGenTarget::CPPSource && m_globalParams.getCount() == 0) { // The nature of our current ABI for entry points on CPU // means that we need an explicit parameter to be *declared* @@ -316,17 +319,17 @@ struct IntroduceExplicitGlobalContextPass // to inialize the corresponding field of the `KernelContext` // before moving on with execution of the kernel body. // - if(m_globalUniformsParam) + for (auto entryPointParam : entryPointParams) { - auto fieldKey = m_mapInstToContextFieldKey[m_globalUniformsParam]; - auto fieldType = globalUniformsParam->getFullType(); + auto fieldKey = m_mapInstToContextFieldKey[entryPointParam.globalParam]; + auto fieldType = entryPointParam.globalParam->getFullType(); auto fieldPtrType = builder.getPtrType(fieldType); // We compute the addrress of the field and store the // value of the parameter into it. // auto fieldPtr = builder.emitFieldAddress(fieldPtrType, contextVarPtr, fieldKey); - builder.emitStore(fieldPtr, globalUniformsParam); + builder.emitStore(fieldPtr, entryPointParam.entryPointParam); } // Note: at this point the `KernelContext` has additional |
