diff options
| author | Nathan V. Morrical <natemorrical@gmail.com> | 2021-08-10 13:25:25 -0600 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-08-10 12:25:25 -0700 |
| commit | 08e36dd6c3c03eb0af7b090d30afee864e309de9 (patch) | |
| tree | 6d580cce8f6912e79ec33b3c1ee08f49f8b20c12 | |
| parent | ebf0e524d61d81a845daaf244b7ceef6c279f95e (diff) | |
Enable reading OptiX SBT records via uniform parameters on ray tracing entry points (#1917)
* optix SBT record data can now be accessed using uniform parameters on ray tracing entry points
* Update slang-emit.cpp
| -rw-r--r-- | build/visual-studio/slang/slang.vcxproj | 2 | ||||
| -rw-r--r-- | build/visual-studio/slang/slang.vcxproj.filters | 6 | ||||
| -rwxr-xr-x | source/slang/slang-compiler.h | 3 | ||||
| -rw-r--r-- | source/slang/slang-emit-cuda.cpp | 7 | ||||
| -rw-r--r-- | source/slang/slang-emit.cpp | 7 | ||||
| -rw-r--r-- | source/slang/slang-ir-inst-defs.h | 8 | ||||
| -rw-r--r-- | source/slang/slang-ir-optix-entry-point-uniforms.cpp | 323 | ||||
| -rw-r--r-- | source/slang/slang-ir-optix-entry-point-uniforms.h | 10 |
8 files changed, 365 insertions, 1 deletions
diff --git a/build/visual-studio/slang/slang.vcxproj b/build/visual-studio/slang/slang.vcxproj index 54f406549..f175d6a31 100644 --- a/build/visual-studio/slang/slang.vcxproj +++ b/build/visual-studio/slang/slang.vcxproj @@ -252,6 +252,7 @@ <ClInclude Include="..\..\..\source\slang\slang-ir-lower-generics.h" /> <ClInclude Include="..\..\..\source\slang\slang-ir-lower-tuple-types.h" /> <ClInclude Include="..\..\..\source\slang\slang-ir-missing-return.h" /> + <ClInclude Include="..\..\..\source\slang\slang-ir-optix-entry-point-uniforms.h" /> <ClInclude Include="..\..\..\source\slang\slang-ir-restructure-scoping.h" /> <ClInclude Include="..\..\..\source\slang\slang-ir-restructure.h" /> <ClInclude Include="..\..\..\source\slang\slang-ir-sccp.h" /> @@ -377,6 +378,7 @@ <ClCompile Include="..\..\..\source\slang\slang-ir-lower-generics.cpp" /> <ClCompile Include="..\..\..\source\slang\slang-ir-lower-tuple-types.cpp" /> <ClCompile Include="..\..\..\source\slang\slang-ir-missing-return.cpp" /> + <ClCompile Include="..\..\..\source\slang\slang-ir-optix-entry-point-uniforms.cpp" /> <ClCompile Include="..\..\..\source\slang\slang-ir-restructure-scoping.cpp" /> <ClCompile Include="..\..\..\source\slang\slang-ir-restructure.cpp" /> <ClCompile Include="..\..\..\source\slang\slang-ir-sccp.cpp" /> diff --git a/build/visual-studio/slang/slang.vcxproj.filters b/build/visual-studio/slang/slang.vcxproj.filters index 25e46742f..1697a385c 100644 --- a/build/visual-studio/slang/slang.vcxproj.filters +++ b/build/visual-studio/slang/slang.vcxproj.filters @@ -207,6 +207,9 @@ <ClInclude Include="..\..\..\source\slang\slang-ir-missing-return.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="..\..\..\source\slang\slang-ir-optix-entry-point-uniforms.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="..\..\..\source\slang\slang-ir-restructure-scoping.h"> <Filter>Header Files</Filter> </ClInclude> @@ -578,6 +581,9 @@ <ClCompile Include="..\..\..\source\slang\slang-ir-missing-return.cpp"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="..\..\..\source\slang\slang-ir-optix-entry-point-uniforms.cpp"> + <Filter>Source Files</Filter> + </ClCompile> <ClCompile Include="..\..\..\source\slang\slang-ir-restructure-scoping.cpp"> <Filter>Source Files</Filter> </ClCompile> diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 52a9f935d..01f23918b 100755 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -1225,6 +1225,9 @@ namespace Slang /// Are we generating code for a Khronos API (OpenGL or Vulkan)? bool isKhronosTarget(TargetRequest* targetReq); + /// Are we generating code for a CUDA API (CUDA / OptiX)? + bool isCUDATarget(TargetRequest* targetReq); + /// Are resource types "bindless" (implemented as ordinary data) on the given `target`? bool areResourceTypesBindlessOnTarget(TargetRequest* target); diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 0a375cb56..61d51cbda 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -811,6 +811,13 @@ bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu } return true; } + case kIROp_GetOptiXSbtDataPtr: + { + m_writer->emit("(*(("); + emitType(inst->getDataType()); + m_writer->emit(")optixGetSbtDataPointer()))"); + return true; + } default: break; } diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index 4a0018770..5ecebddad 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -21,6 +21,7 @@ #include "slang-ir-lower-generics.h" #include "slang-ir-lower-tuple-types.h" #include "slang-ir-lower-bit-cast.h" +#include "slang-ir-optix-entry-point-uniforms.h" #include "slang-ir-restructure.h" #include "slang-ir-restructure-scoping.h" #include "slang-ir-specialize.h" @@ -249,12 +250,18 @@ Result linkAndOptimizeIR( // the global scope instead. // // TODO: We should skip this step for CUDA targets. + // (NM): we actually do need to do this step for OptiX based CUDA targets // { CollectEntryPointUniformParamsOptions passOptions; switch( target ) { case CodeGenTarget::CUDASource: + collectOptiXEntryPointUniformParams(irModule); + #if 0 + dumpIRIfEnabled(compileRequest, irModule, "OPTIX ENTRY POINT UNIFORMS COLLECTED"); + #endif + validateIRModuleIfEnabled(compileRequest, irModule); break; case CodeGenTarget::CPPSource: diff --git a/source/slang/slang-ir-inst-defs.h b/source/slang/slang-ir-inst-defs.h index 847ce1d80..72589912d 100644 --- a/source/slang/slang-ir-inst-defs.h +++ b/source/slang/slang-ir-inst-defs.h @@ -502,7 +502,13 @@ INST(GpuForeach, gpuForeach, 3, 0) INST(GetOptiXRayPayloadPtr, getOptiXRayPayloadPtr, 0, 0) // Wrapper for OptiX intrinsics used to load a single hit attribute -INST(GetOptiXHitAttribute, getOptiXHitAttribute, 1, 0) +// Takes two arguments: the type (either float or int), and the hit +// attribute index +INST(GetOptiXHitAttribute, getOptiXHitAttribute, 2, 0) + +// Wrapper for OptiX intrinsics used to load shader binding table record data +// using a pointer. +INST(GetOptiXSbtDataPtr, getOptiXSbtDataPointer, 0, 0) /* Decoration */ diff --git a/source/slang/slang-ir-optix-entry-point-uniforms.cpp b/source/slang/slang-ir-optix-entry-point-uniforms.cpp new file mode 100644 index 000000000..b26af3e16 --- /dev/null +++ b/source/slang/slang-ir-optix-entry-point-uniforms.cpp @@ -0,0 +1,323 @@ +// slang-ir-optix-entry-point-uniforms.cpp + +// Note: A significant portion of this code is taken and modified from +// slang-ir-entry-point-uniforms.cpp + +#include "slang-ir-optix-entry-point-uniforms.h" + +#include "slang-ir.h" +#include "slang-ir-insts.h" +#include "slang-ir-restructure.h" + +namespace Slang +{ + +struct PerEntryPointPass +{ + // We'll hang on to the module we are processing, + // so that we can refer to it when setting up `IRBuilder`s. + IRModule* module; + + SharedIRBuilder* m_sharedBuilder = nullptr; + + // We will process a whole module by visiting all + // its global functions, looking for entry points. + void processModule() + { + SharedIRBuilder sharedBuilder(module); + m_sharedBuilder = &sharedBuilder; + + // Note that we are only looking at true global-scope + // functions and not functions nested inside of + // IR generics. When using generic entry points, this + // pass should be run after the entry point(s) have + // been specialized to their generic type parameters. + + for( auto inst : module->getGlobalInsts() ) + { + // We are only interested in entry points. + // + // Every entry point must be a function. + // + auto func = as<IRFunc>(inst); + if( !func ) + continue; + + // Entry points will always have the `[entryPoint]` + // decoration to differentiate them from ordinary + // functions. + // + auto entryPointDecor = func->findDecoration<IREntryPointDecoration>(); + if(!entryPointDecor) + continue; + + // Check the IREntryPointDecoration for raytracing entry points + // (as SBT records are only relevant to raytracing) + if (!( + entryPointDecor->getProfile().getStage() == Stage::RayGeneration || + entryPointDecor->getProfile().getStage() == Stage::Intersection || + entryPointDecor->getProfile().getStage() == Stage::AnyHit || + entryPointDecor->getProfile().getStage() == Stage::ClosestHit || + entryPointDecor->getProfile().getStage() == Stage::Miss || + entryPointDecor->getProfile().getStage() == Stage::Callable + )) continue; + + // If we find a candidate entry point, then we + // will process it. + processEntryPoint(func); + } + } + + void processEntryPoint(IRFunc* entryPointFunc) + { + m_entryPointFunc = entryPointFunc; + processEntryPointImpl(entryPointFunc); + } + + IRFunc* m_entryPointFunc = nullptr; + + virtual void processEntryPointImpl(IRFunc* entryPointFunc) = 0; +}; + +struct CollectOptixEntryPointUniformParams : PerEntryPointPass { + + // *If* the entry point has any uniform parameter then we want to create a + // structure type to house them, and then replace the shader parameter + // references with an SBT record access. + + // We only want to create these if actually needed, so we will declare + // them here and then initialize them on-demand. + IRStructType* paramStructType = nullptr; + IRParam* collectedParam = nullptr; + IRVarLayout* entryPointParamsLayout = nullptr; + + void processEntryPointImpl(IRFunc* entryPointFunc) SLANG_OVERRIDE + { + // This pass object may be used across multiple entry points, + // so we need to make sure to reset state that could have been + // left over from a previous entry point. + // + paramStructType = nullptr; + collectedParam = nullptr; + + // We expect all entry points to have explicit layout information attached. + // + // We will assert that we have the information we need, but try to be + // defensive and bail out in the failure case in release builds. + // + auto funcLayoutDecoration = entryPointFunc->findDecoration<IRLayoutDecoration>(); + SLANG_ASSERT(funcLayoutDecoration); + if(!funcLayoutDecoration) + return; + + auto entryPointLayout = as<IREntryPointLayout>(funcLayoutDecoration->getLayout()); + SLANG_ASSERT(entryPointLayout); + if(!entryPointLayout) + return; + + // The parameter layout for an entry point will either be a structure + // type layout, or a constant buffer (a case of parameter group) + // wrapped around such a structure. + // + // If we are in the latter case we will need to make sure to allocate + // an explicit IR constant buffer for that wrapper, + // + // TODO: Reconcile the above with CUDA / OptiX... + entryPointParamsLayout = entryPointLayout->getParamsLayout(); + auto entryPointParamsStructLayout = getScopeStructLayout(entryPointLayout); + + // We will set up an IR builder so that we are ready to generate code. + // + IRBuilder builderStorage(m_sharedBuilder); + auto builder = &builderStorage; + + // We will be removing any uniform parameters we run into, so we + // need to iterate the parameter list carefully to deal with + // us modifying it along the way. + // + IRParam* nextParam = nullptr; + UInt paramCounter = 0; + for( IRParam* param = entryPointFunc->getFirstParam(); param; param = nextParam ) + { + nextParam = param->getNextParam(); + UInt paramIndex = paramCounter++; + + // We expect all entry-point parameters to have layout information, + // but we will be defensive and skip parameters without the required + // information when we are in a release build. + // + auto layoutDecoration = param->findDecoration<IRLayoutDecoration>(); + SLANG_ASSERT(layoutDecoration); + if(!layoutDecoration) + continue; + auto paramLayout = as<IRVarLayout>(layoutDecoration->getLayout()); + SLANG_ASSERT(paramLayout); + if(!paramLayout) + continue; + + // A parameter that has varying input/output behavior should be left alone, + // since this pass is only supposed to apply to uniform (non-varying) + // parameters. + // + // In the case of optix, these varyings come in the form of ray payload + // and hit attributes + // + if(isVaryingParameter(paramLayout)) + continue; + + // At this point we know that `param` is not a varying shader parameter, + // so we'll treat it as part of the SBT record. + // + // If this is the first parameter we are running into, then we need + // to deal with creating the structure type and global shader + // parameter that our transformed entry point will use. + // + ensureCollectedParamAndTypeHaveBeenCreated(); + + // Now that we've ensured the global `struct` type and collected shader parameter + // exist, we need to add a field to the `struct` to represent the + // current parameter. + // + + auto paramType = param->getFullType(); + + builder->setInsertBefore(paramStructType); + // We need to know the "key" that should be used for the parameter, + // so we will read it off of the entry-point layout information. + // + // TODO: Maybe we should associate the key to the parameter via + // a decoration to avoid this indirection? + // + // TODO: Alternatively, we should make this pass responsible for + // dealing with the transfer of layout information from the entry + // point to its parameters, rather than baking that behavior into + // the linker. After all, this pass is traversing the same information + // anyway, so it could do the work while it is here... + // + auto paramFieldKey = cast<IRStructKey>(entryPointParamsStructLayout->getFieldLayoutAttrs()[paramIndex]->getFieldKey()); + + auto paramField = builder->createStructField(paramStructType, paramFieldKey, paramType); + SLANG_UNUSED(paramField); + + // We will transfer all decorations on the parameter over to the key + // so that they can affect downstream emit logic. + // + // TODO: We should double-check whether any of the decorations should + // be moved to the *field* instead. + // + param->transferDecorationsTo(paramFieldKey); + + // At this point we want to eliminate the original entry point + // parameter, in favor of the `struct` field we declared. + // That requires replacing any uses of the parameter with + // appropriate code to pull out the field. + // + // We *could* extract the field at the start of the shader + // and then do a `replaceAllUsesWith` to propragate it + // down, but in practice we expect that it is better for + // performance to "rematerialize" the value of a shader + // parameter as close to where it is used as possible. + // + // We are therefore going to replace the uses one at a time. + // + while(auto use = param->firstUse ) + { + // Given a `use` of the paramter, we will insert + // the replacement code right before the instruction + // that is doing the using. + // + builder->setInsertBefore(use->getUser()); + + // The way to extract the field that corresponds + // to the parameter depends on whether or not + // we generated a constant buffer. + // + IRInst* fieldVal = nullptr; + + // Note: for an optix SBT pointer, we can't dereference + // optixGetSbtDataPointer() like builder->emitFieldAddress requires. + // (thus, this code differs from slang-ir-entry-point-uniforms.cpp) + // Instead, we always use emitFieldExtract, and then the SBT instruction + // emits a C-style cast to the appropriate struct type. + fieldVal = builder->emitFieldExtract( + paramType, + collectedParam, + paramFieldKey); + + // We replace the value used at this use site, which + // will have a side effect of making `use` no longer + // be on the list of uses for `param`, so that when + // we get back to the top of the loop the list of + // uses will be shorter. + // + use->set(fieldVal); + } + + // Once we've replaced all the uses of `param`, we + // can go ahead and remove it completely. + // + param->removeAndDeallocate(); + } + + if( collectedParam ) + { + collectedParam->insertBefore(entryPointFunc->getFirstBlock()->getFirstChild()); + } + else { + // If we didn't find a uniform parameter, we can safely return now. + return; + } + + // Now, replace the collected parameter with OptiX SBT accesses. + auto paramType = collectedParam->getFullType(); + IRInst* getAttr = builder->emitIntrinsicInst(paramType, kIROp_GetOptiXSbtDataPtr, 0, nullptr); + collectedParam->replaceUsesWith(getAttr); + collectedParam->removeAndDeallocate(); + fixUpFuncType(entryPointFunc); + } + + void ensureCollectedParamAndTypeHaveBeenCreated() + { + if (paramStructType) + return; + + IRBuilder builder(m_sharedBuilder); + + // First we create the structure to hold the parameters. + // + builder.setInsertBefore(m_entryPointFunc); + paramStructType = builder.createStructType(); + builder.addNameHintDecoration(paramStructType, UnownedTerminatedStringSlice("ShaderRecordParams")); + + // If we need a constant buffer, then the global + // shader parameter will be a `ConstantBuffer<paramStructType>` + // TODO: reconcile this with OptiX, as the current logic works, but is still focused on VK/DXR.. + // + auto constantBufferType = builder.getConstantBufferType(paramStructType); + collectedParam = builder.createParam(constantBufferType); + + // The global shader parameter should have the layout + // information from the entry point attached to it, so that the + // contained parameters will end up in the right place(s). + // + builder.addLayoutDecoration(collectedParam, entryPointParamsLayout); + + // We add a name hint to the global parameter so that it will + // emit to more readable code when referenced. + // + builder.addNameHintDecoration(collectedParam, UnownedTerminatedStringSlice("shaderRecordParams")); + } +}; + +void collectOptiXEntryPointUniformParams( + IRModule* module) +{ + // look into all entry point functions by checking the IREntryPointDecoration on the children + // Insts of the module. For any ray tracing entry points, collect all uniform parameters into one + // common struct, and replace parameter usage with SBT record accesses. + CollectOptixEntryPointUniformParams context; + context.module = module; + context.processModule(); +} + +} diff --git a/source/slang/slang-ir-optix-entry-point-uniforms.h b/source/slang/slang-ir-optix-entry-point-uniforms.h new file mode 100644 index 000000000..6197e9df5 --- /dev/null +++ b/source/slang/slang-ir-optix-entry-point-uniforms.h @@ -0,0 +1,10 @@ +// slang-ir-optix-entry-point-uniforms.h +#pragma once + +namespace Slang +{ + +struct IRModule; +void collectOptiXEntryPointUniformParams(IRModule* module); + +} |
