summaryrefslogtreecommitdiffstats
path: root/tools/render-test
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2020-09-17 15:07:08 -0700
committerGitHub <noreply@github.com>2020-09-17 15:07:08 -0700
commit017acb3b58df51ec8bf3bb7eb9f66e58b6713145 (patch)
tree85ccefac6f17ad6a75962c3bb618419e7638541f /tools/render-test
parentb9cddcb9c718f986ee5e4f7c6189ee2ebea4ace1 (diff)
Front-load cuda module loading to fill in RTTI pointers. (#1548)
Co-authored-by: Tim Foley <tfoleyNV@users.noreply.github.com>
Diffstat (limited to 'tools/render-test')
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp134
1 files changed, 118 insertions, 16 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index 5acddf94f..f9e67d3f0 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -956,9 +956,10 @@ static bool _hasWriteAccess(SlangResourceAccess access)
/// Assumes that data for binding the kernel parameters is already
/// set up in `outContext.`
///
-static SlangResult _loadAndInvokeComputeProgram(
+static SlangResult _invokeComputeProgram(
CUcontext cudaContext,
ScopeCUDAStream& cudaStream,
+ ScopeCUDAModule& cudaModule,
const ShaderCompilerUtil::OutputAndLayout& outputAndLayout,
const uint32_t dispatchSize[3],
CUDAComputeUtil::Context& outContext)
@@ -968,17 +969,6 @@ static SlangResult _loadAndInvokeComputeProgram(
auto& bindSet = outContext.m_bindSet;
auto& bindRoot = outContext.m_bindRoot;
- const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute);
- if (index < 0)
- {
- return SLANG_FAIL;
- }
-
- const auto& kernelDesc = outputAndLayout.output.kernelDescs[index];
-
- ScopeCUDAModule cudaModule;
- SLANG_RETURN_ON_FAIL(cudaModule.load(kernelDesc.codeBegin));
-
// The global-scope shader parameters in the input Slang program
// will be collected into a single `__constant__` global variable
// in the output CUDA module.
@@ -1370,10 +1360,87 @@ static SlangResult _loadAndInvokeRayTracingProgram(
}
#endif
+ // Fill in RTTI pointers values in input buffers.
+static SlangResult _populateRTTIEntries(
+ const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout,
+ ScopeCUDAModule& cudaModule)
+{
+ Slang::ComPtr<slang::ISession> linkage;
+ spCompileRequest_getSession(compilationAndLayout.output.request, linkage.writeRef());
+ auto& inputLayout = compilationAndLayout.layout;
+ for (auto& entry : inputLayout.entries)
+ {
+ for (auto& rtti : entry.rttiEntries)
+ {
+ CUdeviceptr ptrValue = 0;
+ switch (rtti.type)
+ {
+ case RTTIDataEntryType::RTTIObject:
+ {
+ auto reflection =
+ slang::ShaderReflection::get(compilationAndLayout.output.request);
+ auto concreteType = reflection->findTypeByName(rtti.typeName.getBuffer());
+ ComPtr<ISlangBlob> outName;
+ linkage->getTypeRTTIMangledName(concreteType, outName.writeRef());
+ if (!outName)
+ return SLANG_FAIL;
+ SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetGlobal(
+ &ptrValue,
+ nullptr,
+ cudaModule.m_module,
+ (char*)outName->getBufferPointer()));
+ }
+ break;
+ case RTTIDataEntryType::WitnessTable:
+ {
+ auto reflection =
+ slang::ShaderReflection::get(compilationAndLayout.output.request);
+ auto concreteType = reflection->findTypeByName(rtti.typeName.getBuffer());
+ if (!concreteType)
+ return SLANG_FAIL;
+ auto interfaceType = reflection->findTypeByName(rtti.interfaceName.getBuffer());
+ if (!interfaceType)
+ return SLANG_FAIL;
+ ComPtr<ISlangBlob> outName;
+ linkage->getTypeConformanceWitnessMangledName(
+ concreteType, interfaceType, outName.writeRef());
+ if (!outName)
+ return SLANG_FAIL;
+ SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetGlobal(
+ &ptrValue,
+ nullptr,
+ cudaModule.m_module,
+ (char*)outName->getBufferPointer()));
+ break;
+ }
+ default:
+ break;
+ }
+ if (!ptrValue)
+ return SLANG_FAIL;
+ if (rtti.offset >= 0 &&
+ rtti.offset + sizeof(ptrValue) <=
+ entry.bufferData.getCount() * sizeof(decltype(entry.bufferData[0])))
+ {
+ memcpy(
+ ((char*)entry.bufferData.getBuffer()) + rtti.offset,
+ &ptrValue,
+ sizeof(ptrValue));
+ }
+ else
+ {
+ return SLANG_FAIL;
+ }
+ }
+ }
+ return SLANG_OK;
+}
+
/// Fill in the binding information for arguments of a CUDA program.
static SlangResult _setUpArguments(
CUcontext cudaContext,
ScopeCUDAStream& cudaStream,
+ ScopeCUDAModule& cudaModule,
const ShaderCompilerUtil::OutputAndLayout& outputAndLayout,
const uint32_t dispatchSize[3],
CUDAComputeUtil::Context& outContext)
@@ -1392,6 +1459,14 @@ static SlangResult _setUpArguments(
// Now set up the Values from the test
auto outStream = StdWriters::getOut();
+
+ // Fill in RTTI pointers in input buffers before copying it to GPU memory.
+ // TODO: enable this for Optix path after it is refactored so that context
+ // creation and module loading happens before _setUpArguments.
+ if (outputAndLayout.output.desc.pipelineType == PipelineType::Compute)
+ {
+ SLANG_RETURN_ON_FAIL(_populateRTTIEntries(outputAndLayout, cudaModule));
+ }
SLANG_RETURN_ON_FAIL(ShaderInputLayout::addBindSetValues(outputAndLayout.layout.entries, outputAndLayout.sourcePath, outStream, bindRoot));
ShaderInputLayout::getValueBuffers(outputAndLayout.layout.entries, bindSet, outContext.m_buffers);
@@ -1613,10 +1688,25 @@ static SlangResult _readBackOutputs(
return SLANG_OK;
}
+SlangResult _loadCUDAModule(
+ const ShaderCompilerUtil::OutputAndLayout& outputAndLayout,
+ ScopeCUDAModule& outModule)
+{
+ const Index index = outputAndLayout.output.findKernelDescIndex(StageType::Compute);
+ if (index < 0)
+ {
+ return SLANG_FAIL;
+ }
+ const auto& kernelDesc = outputAndLayout.output.kernelDescs[index];
+ SLANG_RETURN_ON_FAIL(outModule.load(kernelDesc.codeBegin));
+ return SLANG_OK;
+}
+
/// Load and invoke a CUDA program (either compute or ray-tracing)
SlangResult _loadAndInvokeKernel(
CUcontext cudaContext,
ScopeCUDAStream& cudaStream,
+ ScopeCUDAModule& cudaModule,
const ShaderCompilerUtil::OutputAndLayout& outputAndLayout,
const uint32_t dispatchSize[3],
CUDAComputeUtil::Context& outContext)
@@ -1624,11 +1714,13 @@ SlangResult _loadAndInvokeKernel(
switch( outputAndLayout.output.desc.pipelineType )
{
case PipelineType::Compute:
- return _loadAndInvokeComputeProgram(cudaContext, cudaStream, outputAndLayout, dispatchSize, outContext);
+ return _invokeComputeProgram(
+ cudaContext, cudaStream, cudaModule, outputAndLayout, dispatchSize, outContext);
case PipelineType::RayTracing:
#ifdef RENDER_TEST_OPTIX
- return _loadAndInvokeRayTracingProgram(cudaContext, cudaStream, outputAndLayout, dispatchSize, outContext);
+ return _loadAndInvokeRayTracingProgram(
+ cudaContext, cudaStream, outputAndLayout, dispatchSize, outContext);
#endif
break;
@@ -1652,17 +1744,27 @@ SlangResult _loadAndInvokeKernel(
ScopeCUDAStream cudaStream;
//SLANG_CUDA_RETURN_ON_FAIL(cudaStream.init(cudaStreamNonBlocking));
+ ScopeCUDAModule cudaModule;
+
auto& bindSet = outContext.m_bindSet;
auto& bindRoot = outContext.m_bindRoot;
auto request = outputAndLayout.output.request;
auto reflection = (slang::ShaderReflection*) spGetReflection(request);
+ // Load cuda module first so its symbols may be queried and filled into argument buffers.
+ // TODO: refactor optix path to also front-load its context creation and module loading here.
+ // For now just front-load compute kernels.
+ if (outputAndLayout.output.desc.pipelineType == PipelineType::Compute)
+ {
+ SLANG_RETURN_ON_FAIL(_loadCUDAModule(outputAndLayout, cudaModule));
+ }
+
SLANG_RETURN_ON_FAIL(_setUpArguments(
- cudaContext, cudaStream, outputAndLayout, dispatchSize, outContext));
+ cudaContext, cudaStream, cudaModule, outputAndLayout, dispatchSize, outContext));
SLANG_RETURN_ON_FAIL(_loadAndInvokeKernel(
- cudaContext, cudaStream, outputAndLayout, dispatchSize, outContext));
+ cudaContext, cudaStream, cudaModule, outputAndLayout, dispatchSize, outContext));
// Finally we need to copy the data back
SLANG_RETURN_ON_FAIL(_readBackOutputs(