summaryrefslogtreecommitdiffstats
path: root/tools/gfx/cuda/cuda-command-queue.cpp
diff options
context:
space:
mode:
authorYong He <yonghe@outlook.com>2023-04-13 09:49:22 -0700
committerGitHub <noreply@github.com>2023-04-13 09:49:22 -0700
commit813892cd023e216f6f6560eb47566522d3a82609 (patch)
tree07cbf8851e0c178cbc895be73e17e6340cc22685 /tools/gfx/cuda/cuda-command-queue.cpp
parent352a460fc866998da5f45a8c117d891c51ab5a47 (diff)
Set sharedMem argument to 0 when launching cuda kernel. (#2799)
Co-authored-by: Yong He <yhe@nvidia.com>
Diffstat (limited to 'tools/gfx/cuda/cuda-command-queue.cpp')
-rw-r--r--tools/gfx/cuda/cuda-command-queue.cpp8
1 files changed, 1 insertions, 7 deletions
diff --git a/tools/gfx/cuda/cuda-command-queue.cpp b/tools/gfx/cuda/cuda-command-queue.cpp
index 0c17a418e..4b0ab7d94 100644
--- a/tools/gfx/cuda/cuda-command-queue.cpp
+++ b/tools/gfx/cuda/cuda-command-queue.cpp
@@ -93,12 +93,6 @@ void CommandQueueImpl::dispatchCompute(int x, int y, int z)
UInt threadGroupSize[3];
programLayout->getKernelThreadGroupSize(kernelId, threadGroupSize);
- int sharedSizeInBytes;
- cuFuncGetAttribute(
- &sharedSizeInBytes,
- CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
- currentPipeline->shaderProgram->cudaKernel);
-
// Copy global parameter data to the `SLANG_globalParams` symbol.
{
CUdeviceptr globalParamsSymbol = 0;
@@ -144,7 +138,7 @@ void CommandQueueImpl::dispatchCompute(int x, int y, int z)
int(threadGroupSize[0]),
int(threadGroupSize[1]),
int(threadGroupSize[2]),
- sharedSizeInBytes,
+ 0,
stream,
nullptr,
extraOptions);