summaryrefslogtreecommitdiffstats
path: root/tools/render-test/cuda/cuda-compute-util.cpp
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-01-21 09:38:10 -0500
committerGitHub <noreply@github.com>2020-01-21 09:38:10 -0500
commit47392bc72b826b4ad427b703391a77e697735a65 (patch)
tree7c541c4295742b765124f42bab9f713276c83580 /tools/render-test/cuda/cuda-compute-util.cpp
parenta8669ade5cb3add8b9ce08e2c3bd96e93190bca8 (diff)
CUDA support improvements (#1168)
* Add test result for compile-to-cuda * Add RAII for some CUDA types to simplify usage. * First pass handling of some instrinsics on CUDA (for example transcendentals) * CUDA working with built in intrinsics. * Add missing CUDA prelude intrinsics. * CUDA matches CPU output on simple-cross-compile.slang * First pass at hlsl-scalar-float-intrinsic.slang test. * Fix smoothstep impl on CUDA and CPU. * Fixed step intrinsic on CUDA/CPU. * Added operator[] to Matrix for C++, to allow row access. Needs a fix for CUDA. * Fixed warning on clang build.
Diffstat (limited to 'tools/render-test/cuda/cuda-compute-util.cpp')
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp103
1 files changed, 79 insertions, 24 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index 74810e675..e42a0a53e 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -58,6 +58,76 @@ public:
void* m_cudaMemory;
};
+class ScopeCUDAModule
+{
+public:
+
+ operator CUmodule () const { return m_module; }
+
+ ScopeCUDAModule(): m_module(nullptr) {}
+ SlangResult load(const void* image)
+ {
+ release();
+ SLANG_CUDA_RETURN_ON_FAIL(cuModuleLoadData(&m_module, image));
+ return SLANG_OK;
+ }
+ void release()
+ {
+ if (m_module)
+ {
+ cuModuleUnload(m_module);
+ m_module = nullptr;
+ }
+ }
+
+ ~ScopeCUDAModule() { release(); }
+
+ CUmodule m_module;
+};
+
+class ScopeCUDAStream
+{
+public:
+
+ SlangResult init(unsigned int flags)
+ {
+ release();
+ SLANG_ASSERT(m_stream == nullptr);
+ SLANG_CUDA_RETURN_ON_FAIL(cudaStreamCreateWithFlags(&m_stream, flags));
+ return SLANG_OK;
+ }
+
+ SlangResult sync()
+ {
+ if (m_stream)
+ {
+ SLANG_CUDA_RETURN_ON_FAIL(cudaStreamSynchronize(m_stream));
+ }
+ else
+ {
+ SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceSynchronize());
+ }
+ return SLANG_OK;
+ }
+
+ void release()
+ {
+ if (m_stream)
+ {
+ sync();
+ SLANG_CUDA_ASSERT_ON_FAIL(cudaStreamDestroy(m_stream));
+ m_stream = nullptr;
+ }
+ }
+
+ ScopeCUDAStream():m_stream(nullptr) {}
+
+ ~ScopeCUDAStream() { release(); }
+
+ operator cudaStream_t () const { return m_stream; }
+
+ cudaStream_t m_stream;
+};
@@ -250,9 +320,9 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
CUfunction kernel;
SLANG_CUDA_RETURN_ON_FAIL(cuModuleGetFunction(&kernel, module, entryPointName));
- // A stream of 0 means no stream
- cudaStream_t stream = 0;
- //SLANG_CUDA_RETURN_ON_FAIL(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
+ // A default stream, will act as a global stream. Calling sync will globally sync
+ ScopeCUDAStream cudaStream;
+ //SLANG_CUDA_RETURN_ON_FAIL(cudaStream.init(cudaStreamNonBlocking));
{
// Okay now we need to set up binding
@@ -464,21 +534,14 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
1, 1, 1, // Blocks
int(numThreadsPerAxis[0]), int(numThreadsPerAxis[1]), int(numThreadsPerAxis[2]), // Threads per block
0, // Shared memory size
- stream, // Stream. 0 is no stream.
+ cudaStream, // Stream. 0 is no stream.
args, // Args
nullptr); // extra
SLANG_CUDA_RETURN_ON_FAIL(cudaLaunchResult);
- if (stream)
- {
- SLANG_CUDA_RETURN_ON_FAIL(cudaStreamSynchronize(stream));
- }
- else
- {
- // Do a sync here. Makes sure any issues are detected early and not on some implicit sync
- SLANG_CUDA_RETURN_ON_FAIL(cudaDeviceSynchronize());
- }
+ // Do a sync here. Makes sure any issues are detected early and not on some implicit sync
+ SLANG_RETURN_ON_FAIL(cudaStream.sync());
}
// Finally we need to copy the data back
@@ -503,11 +566,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
}
}
}
-
- if (stream)
- {
- SLANG_CUDA_RETURN_ON_FAIL(cudaStreamDestroy(stream));
- }
}
// Release all othe CUDA resource/allocations
@@ -529,12 +587,9 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
const auto& kernel = outputAndLayout.output.kernelDescs[index];
- CUmodule module = 0;
- SLANG_CUDA_RETURN_ON_FAIL(cuModuleLoadData(&module, kernel.codeBegin));
-
- SLANG_RETURN_ON_FAIL(_compute(cudaContext, module, outputAndLayout, outContext));
-
- SLANG_CUDA_RETURN_ON_FAIL(cuModuleUnload(module));
+ ScopeCUDAModule cudaModule;
+ SLANG_RETURN_ON_FAIL(cudaModule.load(kernel.codeBegin));
+ SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, outContext));
return SLANG_OK;
}