summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-02-08 11:19:31 -0500
committerGitHub <noreply@github.com>2020-02-08 11:19:31 -0500
commit0eed0125fa5e5f425d546efdc2b284b09ffc2785 (patch)
tree4ded20c4966f05881a056fad8146e34ac595063e
parent7de90c1e0b42b565a5f46e2f9f7580e1f577d414 (diff)
Fixes to make all CPU compute shaders work on CUDA (#1211)
* Launch CUDA test taking into account dispatch size. * Enable isCPUOnly hack to work on CUDA. * Rename 'isCPUOnly' hack to 'onlyCPULikeBinding'. * Add $T special type. Support SampleLevel on CUDA. * Fix typo.
-rw-r--r--source/slang/core.meta.slang19
-rw-r--r--source/slang/core.meta.slang.h21
-rw-r--r--source/slang/slang-emit-c-like.cpp16
-rw-r--r--tests/compute/entry-point-uniform-params.slang2
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp11
-rw-r--r--tools/render-test/cuda/cuda-compute-util.h2
-rw-r--r--tools/render-test/render-test-main.cpp2
-rw-r--r--tools/render-test/shader-input-layout.cpp13
-rw-r--r--tools/render-test/shader-input-layout.h2
-rw-r--r--tools/render-test/shader-renderer-util.cpp2
10 files changed, 70 insertions, 20 deletions
diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang
index 85eb82576..14a8a0750 100644
--- a/source/slang/core.meta.slang
+++ b/source/slang/core.meta.slang
@@ -896,7 +896,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
if( baseShape != TextureFlavor::Shape::ShapeCube )
{
- sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$S0>($0";
+ sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$T0>($0";
if (kBaseTextureTypes[tt].coordCount == 1)
{
sb << ", $2";
@@ -1049,10 +1049,27 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
// `SampleLevel`
sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n";
+
+ // CUDA
+ if (!isArray)
+ {
+ sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "DLod<$T0>($0";
+ for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ {
+ sb << ", $2";
+ if (kBaseTextureTypes[tt].coordCount > 1)
+ {
+ sb << '.' << char(i + 'x');
+ }
+ }
+ sb << ", $3)\")\n";
+ }
+
sb << "T SampleLevel(SamplerState s, ";
sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
sb << "float level);\n";
+
if( baseShape != TextureFlavor::Shape::ShapeCube )
{
sb << "__target_intrinsic(glsl, \"$ctextureLodOffset($p, $2, $3, $4)$z\")\n";
diff --git a/source/slang/core.meta.slang.h b/source/slang/core.meta.slang.h
index b8d7d5d9c..c659d09ff 100644
--- a/source/slang/core.meta.slang.h
+++ b/source/slang/core.meta.slang.h
@@ -917,7 +917,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
if( baseShape != TextureFlavor::Shape::ShapeCube )
{
- sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$S0>($0";
+ sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "D<$T0>($0";
if (kBaseTextureTypes[tt].coordCount == 1)
{
sb << ", $2";
@@ -1070,10 +1070,27 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
// `SampleLevel`
sb << "__target_intrinsic(glsl, \"$ctextureLod($p, $2, $3)$z\")\n";
+
+ // CUDA
+ if (!isArray)
+ {
+ sb << "__target_intrinsic(cuda, \"tex" << kBaseTextureTypes[tt].coordCount << "DLod<$T0>($0";
+ for (int i = 0; i < kBaseTextureTypes[tt].coordCount; ++i)
+ {
+ sb << ", $2";
+ if (kBaseTextureTypes[tt].coordCount > 1)
+ {
+ sb << '.' << char(i + 'x');
+ }
+ }
+ sb << ", $3)\")\n";
+ }
+
sb << "T SampleLevel(SamplerState s, ";
sb << "float" << kBaseTextureTypes[tt].coordCount + isArray << " location, ";
sb << "float level);\n";
+
if( baseShape != TextureFlavor::Shape::ShapeCube )
{
sb << "__target_intrinsic(glsl, \"$ctextureLodOffset($p, $2, $3, $4)$z\")\n";
@@ -1282,7 +1299,7 @@ for (auto op : binaryOps)
sb << "__intrinsic_op(" << int(op.opCode) << ") matrix<" << resultType << ",N,M> operator" << op.opName << "(" << leftQual << "matrix<" << leftType << ",N,M> left, " << rightType << " right);\n";
}
}
-SLANG_RAW("#line 1264 \"core.meta.slang\"")
+SLANG_RAW("#line 1281 \"core.meta.slang\"")
SLANG_RAW("\n")
SLANG_RAW("\n")
SLANG_RAW("// Specialized function\n")
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index a383caecf..2212cf9cc 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -1367,6 +1367,22 @@ void CLikeSourceEmitter::emitIntrinsicCallExprImpl(
}
break;
+ case 'T':
+ // Get the the 'element' type for the type of the param at the index
+ {
+ SLANG_RELEASE_ASSERT(*cursor >= '0' && *cursor <= '9');
+ Index argIndex = (*cursor++) - '0';
+ SLANG_RELEASE_ASSERT(argCount > argIndex);
+
+ IRType* type = args[argIndex].get()->getDataType();
+ if (auto baseTextureType = as<IRTextureType>(type))
+ {
+ type = baseTextureType->getElementType();
+ }
+ emitType(type);
+ }
+ break;
+
case 'S':
// Get the scalar type of a generic at specified index
{
diff --git a/tests/compute/entry-point-uniform-params.slang b/tests/compute/entry-point-uniform-params.slang
index af5a87616..4ca8d6786 100644
--- a/tests/compute/entry-point-uniform-params.slang
+++ b/tests/compute/entry-point-uniform-params.slang
@@ -34,7 +34,7 @@ ConstantBuffer<Signs> signs;
void computeMain(
//TEST_INPUT:cbuffer(data=[2 0 0 0 3 0 0 0]):name=stuff
uniform Stuff stuff,
-//TEST_INPUT:cbuffer(data=[3]):isCPUOnly,name=things
+//TEST_INPUT:cbuffer(data=[3]):onlyCPULikeBinding,name=things
uniform Things things,
//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index a50295063..c6862d2d3 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -347,7 +347,7 @@ public:
return SLANG_SUCCEEDED(context.init(0));
}
-static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, CUDAComputeUtil::Context& outContext)
+static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], CUDAComputeUtil::Context& outContext)
{
auto& bindSet = outContext.m_bindSet;
auto& bindRoot = outContext.m_bindRoot;
@@ -680,11 +680,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
entryPoint->getComputeThreadGroupSize(3, numThreadsPerAxis);
// Launch
- // TODO(JS): We probably want to do something a little more clever here using the maxThreadsPerBlock,
- // but for now just launch a single block, and hope it all fits.
-
auto cudaLaunchResult = cuLaunchKernel(kernel,
- 1, 1, 1, // Blocks
+ dispatchSize[0], dispatchSize[1], dispatchSize[2],
int(numThreadsPerAxis[0]), int(numThreadsPerAxis[1]), int(numThreadsPerAxis[2]), // Threads per block
0, // Shared memory size
cudaStream, // Stream. 0 is no stream.
@@ -727,7 +724,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
return SLANG_OK;
}
-/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, Context& outContext)
+/* static */SlangResult CUDAComputeUtil::execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext)
{
ScopeCUDAContext cudaContext;
SLANG_RETURN_ON_FAIL(cudaContext.init(0));
@@ -742,7 +739,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
ScopeCUDAModule cudaModule;
SLANG_RETURN_ON_FAIL(cudaModule.load(kernel.codeBegin));
- SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, outContext));
+ SLANG_RETURN_ON_FAIL(_compute(cudaContext, cudaModule, outputAndLayout, dispatchSize, outContext));
return SLANG_OK;
}
diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h
index f739ade91..8965f5037 100644
--- a/tools/render-test/cuda/cuda-compute-util.h
+++ b/tools/render-test/cuda/cuda-compute-util.h
@@ -36,7 +36,7 @@ struct CUDAComputeUtil
List<BindSet::Value*> m_buffers;
};
- static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, Context& outContext);
+ static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext);
static bool canCreateDevice();
};
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index c0d105a64..4f5d1e9bb 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -613,7 +613,7 @@ SLANG_TEST_TOOL_API SlangResult innerMain(Slang::StdWriters* stdWriters, SlangSe
const uint64_t startTicks = ProcessUtil::getClockTick();
CUDAComputeUtil::Context context;
- SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout, context));
+ SLANG_RETURN_ON_FAIL(CUDAComputeUtil::execute(compilationAndLayout, gOptions.computeDispatchSize, context));
if (gOptions.performanceProfile)
{
diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp
index 40502a9ec..d3c206b58 100644
--- a/tools/render-test/shader-input-layout.cpp
+++ b/tools/render-test/shader-input-layout.cpp
@@ -56,8 +56,9 @@ namespace renderer_test
return -1;
}
- static bool _isCPUTarget(SlangCompileTarget target)
+ static bool _isCPULikeBindingTarget(SlangCompileTarget target)
{
+ // CUDA and C++ are 'CPULike' in terms of their binding mechanism
switch (target)
{
case SLANG_C_SOURCE:
@@ -65,6 +66,8 @@ namespace renderer_test
case SLANG_EXECUTABLE:
case SLANG_SHARED_LIBRARY:
case SLANG_HOST_CALLABLE:
+ case SLANG_CUDA_SOURCE:
+ case SLANG_PTX:
{
return true;
}
@@ -74,13 +77,13 @@ namespace renderer_test
void ShaderInputLayout::updateForTarget(SlangCompileTarget target)
{
- if (!_isCPUTarget(target))
+ if (!_isCPULikeBindingTarget(target))
{
int count = int(entries.getCount());
for (int i = 0; i < count; ++i)
{
auto& entry = entries[i];
- if (entry.isCPUOnly)
+ if (entry.onlyCPULikeBinding)
{
entries.removeAt(i);
i--;
@@ -462,9 +465,9 @@ namespace renderer_test
parser.Read(":");
while (!parser.IsEnd())
{
- if (parser.LookAhead("isCPUOnly"))
+ if (parser.LookAhead("onlyCPULikeBinding"))
{
- entry.isCPUOnly = true;
+ entry.onlyCPULikeBinding = true;
parser.ReadToken();
}
else if (parser.LookAhead("out"))
diff --git a/tools/render-test/shader-input-layout.h b/tools/render-test/shader-input-layout.h
index 504b714c5..a9d525d47 100644
--- a/tools/render-test/shader-input-layout.h
+++ b/tools/render-test/shader-input-layout.h
@@ -71,7 +71,7 @@ public:
InputSamplerDesc samplerDesc;
ArrayDesc arrayDesc;
bool isOutput = false;
- bool isCPUOnly = false;
+ bool onlyCPULikeBinding = false; ///< If true, only use on targets that have 'uniform' or 'CPU like' binding, like CPU and CUDA
Slang::String name; ///< Optional name. Useful for binding through reflection.
};
diff --git a/tools/render-test/shader-renderer-util.cpp b/tools/render-test/shader-renderer-util.cpp
index f73595f4f..987b63b48 100644
--- a/tools/render-test/shader-renderer-util.cpp
+++ b/tools/render-test/shader-renderer-util.cpp
@@ -176,7 +176,7 @@ static RefPtr<SamplerState> _createSamplerState(
for (Index i = 0; i < numEntries; i++)
{
const ShaderInputLayoutEntry& srcEntry = srcEntries[i];
- SLANG_ASSERT(srcEntry.isCPUOnly == false);
+ SLANG_ASSERT(srcEntry.onlyCPULikeBinding == false);
DescriptorSetLayout::SlotRangeDesc slotRangeDesc;