Fixes to make all CPU compute shaders work on CUDA (#1211)

* Launch CUDA test taking into account dispatch size. * Enable isCPUOnly hack to work on CUDA. * Rename 'isCPUOnly' hack to 'onlyCPULikeBinding'. * Add $T special type. Support SampleLevel on CUDA. * Fix typo.
author: jsmall-nvidia <jsmall@nvidia.com> 2020-02-08 11:19:31 -0500
committer: GitHub <noreply@github.com> 2020-02-08 11:19:31 -0500
commit: 0eed0125fa5e5f425d546efdc2b284b09ffc2785 (patch)
tree: 4ded20c4966f05881a056fad8146e34ac595063e /tools/render-test
parent: 7de90c1e0b42b565a5f46e2f9f7580e1f577d414 (diff)
6 files changed, 16 insertions, 16 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index a50295063..c6862d2d3 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -347,7 +347,7 @@ public:
     return SLANG_SUCCEEDED(context.init(0));
 }
 
-static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, CUDAComputeUtil::Context& outContext)
+static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], CUDAComputeUtil::Context& outContext)
 {
     auto& bindSet = outContext.m_bindSet;
     auto& bindRoot = outContext.m_bindRoot;
@@ -680,11 +680,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
             entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis);
 
             // Launch
-            // TODO(JS): We probably want to do something a little more clever here using the maxThreadsPerBlock,
-            // but for now just launch a single block, and hope it all fits.
-
             auto cudaLaunchResult = cuLaunchKernel(kernel,
-                1, 1, 1,                                                                                // Blocks
+                dispatchSize[0], dispatchSize[1], dispatchSize[2], 
                 int(numThreadsPerAxis[0]), int(numThreadsPerAxis[1]), int(numThreadsPerAxis[2]),        // Threads per block
                 0,                                                                                      // Shared memory size
                 cudaStream,                                                                                 // Stream. 0 is no stream.
@@ -727,7 +724,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
     return SLANG_OK;
 }
 
-/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, Context& outContext)
+/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext)
 {
     ScopeCUDAContext cudaContext;
     SLANG_RETURN_ON_FAIL(cudaContext.init(0));
@@ -742,7 +739,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
 
     ScopeCUDAModule cudaModule;
     SLANG_RETURN_ON_FAIL(cudaModule.load(kernel.codeBegin));
-    SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, outContext));
+    SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, dispatchSize, outContext));
 
     return SLANG_OK;
 }
diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h
index f739ade91..8965f5037 100644
--- a/tools/render-test/cuda/cuda-compute-util.h
+++ b/tools/render-test/cuda/cuda-compute-util.h
@@ -36,7 +36,7 @@ struct CUDAComputeUtil
         List<BindSet::Value*> m_buffers;
     };
 
-    static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, Context& outContext);
+    static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext);
 
     static bool canCreateDevice();
 };
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index c0d105a64..4f5d1e9bb 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -613,7 +613,7 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe
         const uint64_t startTicks = ProcessUtil::getClockTick();
 
         CUDAComputeUtil::Context context;
-        SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout, context));
+        SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout, gOptions.computeDispatchSize, context));
 
         if (gOptions.performanceProfile)
         {
diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp
index 40502a9ec..d3c206b58 100644
--- a/tools/render-test/shader-input-layout.cpp
+++ b/tools/render-test/shader-input-layout.cpp
@@ -56,8 +56,9 @@ namespace renderer_test
         return -1;
     }
 
-    static bool _isCPUTarget(SlangCompileTarget target)
+    static bool _isCPULikeBindingTarget(SlangCompileTarget target)
     {
+        // CUDA and C++ are 'CPULike' in terms of their binding mechanism
         switch (target)
         {
             case SLANG_C_SOURCE:
@@ -65,6 +66,8 @@ namespace renderer_test
             case SLANG_EXECUTABLE:
             case SLANG_SHARED_LIBRARY:
             case SLANG_HOST_CALLABLE:
+            case SLANG_CUDA_SOURCE:
+            case SLANG_PTX:
             {
                 return true;
             }
@@ -74,13 +77,13 @@ namespace renderer_test
 
     void ShaderInputLayout::updateForTarget(SlangCompileTarget target)
     {
-        if (!_isCPUTarget(target))
+        if (!_isCPULikeBindingTarget(target))
         {
             int count = int(entries.getCount());
             for (int i = 0; i < count; ++i)
             {
                 auto& entry = entries[i];
-                if (entry.isCPUOnly)
+                if (entry.onlyCPULikeBinding)
                 {
                     entries.removeAt(i);
                     i--;
@@ -462,9 +465,9 @@ namespace renderer_test
                             parser.Read(":");
                             while (!parser.IsEnd())
                             {
-                                if (parser.LookAhead("isCPUOnly"))
+                                if (parser.LookAhead("onlyCPULikeBinding"))
                                 {
-                                    entry.isCPUOnly = true;
+                                    entry.onlyCPULikeBinding = true;
                                     parser.ReadToken();
                                 }
                                 else if (parser.LookAhead("out"))
diff --git a/tools/render-test/shader-input-layout.h b/tools/render-test/shader-input-layout.h
index 504b714c5..a9d525d47 100644
--- a/tools/render-test/shader-input-layout.h
+++ b/tools/render-test/shader-input-layout.h
@@ -71,7 +71,7 @@ public:
     InputSamplerDesc samplerDesc;
     ArrayDesc arrayDesc;
     bool isOutput = false;
-    bool isCPUOnly = false;
+    bool onlyCPULikeBinding = false;        ///< If true, only use on targets that have 'uniform' or 'CPU like' binding, like CPU and CUDA
 
     Slang::String name;                     ///< Optional name. Useful for binding through reflection.
 };
diff --git a/tools/render-test/shader-renderer-util.cpp b/tools/render-test/shader-renderer-util.cpp
index f73595f4f..987b63b48 100644
--- a/tools/render-test/shader-renderer-util.cpp
+++ b/tools/render-test/shader-renderer-util.cpp
@@ -176,7 +176,7 @@ static RefPtr<SamplerState> _createSamplerState(
     for (Index i = 0; i < numEntries; i++)
     {
         const ShaderInputLayoutEntry& srcEntry = srcEntries[i];
-        SLANG_ASSERT(srcEntry.isCPUOnly == false);
+        SLANG_ASSERT(srcEntry.onlyCPULikeBinding == false);
 
         DescriptorSetLayout::SlotRangeDesc slotRangeDesc;
author	jsmall-nvidia <jsmall@nvidia.com>	2020-02-08 11:19:31 -0500
committer	GitHub <noreply@github.com>	2020-02-08 11:19:31 -0500
commit	0eed0125fa5e5f425d546efdc2b284b09ffc2785 (patch)
tree	4ded20c4966f05881a056fad8146e34ac595063e /tools/render-test
parent	7de90c1e0b42b565a5f46e2f9f7580e1f577d414 (diff)