diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-02-08 11:19:31 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-02-08 11:19:31 -0500 |
| commit | 0eed0125fa5e5f425d546efdc2b284b09ffc2785 (patch) | |
| tree | 4ded20c4966f05881a056fad8146e34ac595063e /tools/render-test/cuda | |
| parent | 7de90c1e0b42b565a5f46e2f9f7580e1f577d414 (diff) | |
Fixes to make all CPU compute shaders work on CUDA (#1211)
* Launch CUDA test taking into account dispatch size.
* Enable isCPUOnly hack to work on CUDA.
* Rename 'isCPUOnly' hack to 'onlyCPULikeBinding'.
* Add $T special type.
Support SampleLevel on CUDA.
* Fix typo.
Diffstat (limited to 'tools/render-test/cuda')
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 11 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.h | 2 |
2 files changed, 5 insertions, 8 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index a50295063..c6862d2d3 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -347,7 +347,7 @@ public: return SLANG_SUCCEEDED(context.init(0)); } -static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, CUDAComputeUtil::Context& outContext) +static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], CUDAComputeUtil::Context& outContext) { auto& bindSet = outContext.m_bindSet; auto& bindRoot = outContext.m_bindRoot; @@ -680,11 +680,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis); // Launch - // TODO(JS): We probably want to do something a little more clever here using the maxThreadsPerBlock, - // but for now just launch a single block, and hope it all fits. - auto cudaLaunchResult = cuLaunchKernel(kernel, - 1, 1, 1, // Blocks + dispatchSize[0], dispatchSize[1], dispatchSize[2], int(numThreadsPerAxis[0]), int(numThreadsPerAxis[1]), int(numThreadsPerAxis[2]), // Threads per block 0, // Shared memory size cudaStream, // Stream. 0 is no stream. @@ -727,7 +724,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp return SLANG_OK; } -/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, Context& outContext) +/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext) { ScopeCUDAContext cudaContext; SLANG_RETURN_ON_FAIL(cudaContext.init(0)); @@ -742,7 +739,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp ScopeCUDAModule cudaModule; SLANG_RETURN_ON_FAIL(cudaModule.load(kernel.codeBegin)); - SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, outContext)); + SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, dispatchSize, outContext)); return SLANG_OK; } diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h index f739ade91..8965f5037 100644 --- a/tools/render-test/cuda/cuda-compute-util.h +++ b/tools/render-test/cuda/cuda-compute-util.h @@ -36,7 +36,7 @@ struct CUDAComputeUtil List<BindSet::Value*> m_buffers; }; - static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, Context& outContext); + static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext); static bool canCreateDevice(); }; |
