summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-02-18 12:40:14 -0500
committerGitHub <noreply@github.com>2020-02-18 12:40:14 -0500
commite109985375712b449d365450b3d3e39416a171ce (patch)
tree56a2c805368d5afbfa568e514af0704b8ed7346c
parent2c097545eaa324a91a035327abad2e8b4fa60469 (diff)
CUDA/CPU resource coverage (#1224)
* Add cubemap support. * Add CUDA fence instrinsics. * Added Gather for CUDA. * Use the CUDA driver API as much as possible. * * Support 1D texture on CPU * WIP on 1D texture on CUDA * Added simplified texture test * Fix test. * Improve texture-simple tests. * * Add CPU support for 3d textures * Add support for mip maps to CUDA * Disable warnings in nvrtc * Update CUDA docs * WIP on 3d texture support. * Add support for 3d textures for CPU and CUDA.
-rw-r--r--docs/cuda-target.md10
-rw-r--r--prelude/slang-cpp-types.h17
-rw-r--r--source/core/slang-nvrtc-compiler.cpp8
-rw-r--r--tests/compute/texture-simple.slang5
-rw-r--r--tests/compute/texture-simple.slang.expected.txt8
-rw-r--r--tools/render-test/cpu-compute-util.cpp60
-rw-r--r--tools/render-test/cuda/cuda-compute-util.cpp196
7 files changed, 251 insertions, 53 deletions
diff --git a/docs/cuda-target.md b/docs/cuda-target.md
index 9c82b1dc9..79251251b 100644
--- a/docs/cuda-target.md
+++ b/docs/cuda-target.md
@@ -20,8 +20,7 @@ These limitations apply to Slang transpiling to CUDA.
The following are a work in progress or not implmented but are planned to be so in the future
-* Barriers/Atomics/Complex resource types
-* Preliminary version does maps StructuredBuffers to a pointer - and without boudn checking
+* Resource types including surfaces
# How it works
@@ -137,9 +136,8 @@ For a client application - as long as the requirements of the generated code are
That for pass-through usage, prelude is not pre-pended, preludes are for code generation only.
*/
-virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPrelude(
-SlangPassThrough passThrough,
-const char* preludeText) = 0;
+
+void setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText);
```
The code that sets up the prelude for the test infrastucture and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
@@ -152,5 +150,3 @@ Language aspects
Slang follows the HLSL convention that arrays are passed by value. This is in contrast with CUDA where arrays follow C++ conventions and are passed by reference. To make generated CUDA follow this convention an array is turned into a 'FixedArray' struct type.
To get something more similar to CUDA/C++ operation the array can be marked in out or inout to make it passed by reference.
-
-
diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h
index 2238727c5..936233afc 100644
--- a/prelude/slang-cpp-types.h
+++ b/prelude/slang-cpp-types.h
@@ -262,6 +262,23 @@ struct Texture2D
ITexture2D* texture;
};
+struct ITexture3D
+{
+ virtual void Load(const int4& v, void* out) = 0;
+ virtual void Sample(SamplerState samplerState, const float3& loc, void* out) = 0;
+ virtual void SampleLevel(SamplerState samplerState, const float3& loc, float level, void* out) = 0;
+};
+
+template <typename T>
+struct Texture3D
+{
+ T Load(const int4& v) const { T out; texture->Load(v, &out); return out; }
+ T Sample(SamplerState samplerState, const float3& v) const { T out; texture->Sample(samplerState, v, &out); return out; }
+ T SampleLevel(SamplerState samplerState, const float3& v, float level) { T out; texture->SampleLevel(samplerState, v, level, &out); return out; }
+
+ ITexture3D* texture;
+};
+
/* Varying input for Compute */
/* Used when running a single thread */
diff --git a/source/core/slang-nvrtc-compiler.cpp b/source/core/slang-nvrtc-compiler.cpp
index 6464592a5..f68c4dc01 100644
--- a/source/core/slang-nvrtc-compiler.cpp
+++ b/source/core/slang-nvrtc-compiler.cpp
@@ -297,8 +297,16 @@ SlangResult NVRTCDownstreamCompiler::compile(const CompileOptions& options, RefP
cmdLine.addArg(include);
}
+ // Neither of these options are strictly required, for general use of nvrtc,
+ // but are enabled to make use withing Slang work more smoothly
{
+ // Require c++14, as makes initialization construction with {} available and so simplifies code generation
cmdLine.addArg("-std=c++14");
+
+ // Disable all warnings
+ // This is arguably too much - but nvrtc does not appear to have a mechanism to switch off individual warnings.
+ // I tried the -Xcudafe mechanism but that does not appear to work for nvrtc
+ cmdLine.addArg("-w");
}
nvrtcProgram program = nullptr;
diff --git a/tests/compute/texture-simple.slang b/tests/compute/texture-simple.slang
index 040af2784..e79a26885 100644
--- a/tests/compute/texture-simple.slang
+++ b/tests/compute/texture-simple.slang
@@ -2,7 +2,7 @@
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil
-// TODO(JS): Doesn't work on vk currently
+// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer
//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
@@ -10,6 +10,8 @@
Texture1D<float> t1D;
//TEST_INPUT: Texture2D(size=4, content = one):name t2D
Texture2D<float> t2D;
+//TEST_INPUT: Texture3D(size=4, content = one):name t3D
+Texture3D<float> t3D;
//TEST_INPUT: Sampler:name samplerState
SamplerState samplerState;
@@ -26,6 +28,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
float val = 0.0f;
val += t1D.SampleLevel(samplerState, u, 0);
val += t2D.SampleLevel(samplerState, float2(u, u), 0);
+ val += t3D.SampleLevel(samplerState, float3(u, u, u), 0);
outputBuffer[idx] = val;
}
diff --git a/tests/compute/texture-simple.slang.expected.txt b/tests/compute/texture-simple.slang.expected.txt
index f5cf6fb10..e54af3bc8 100644
--- a/tests/compute/texture-simple.slang.expected.txt
+++ b/tests/compute/texture-simple.slang.expected.txt
@@ -1,4 +1,4 @@
-40000000
-40000000
-40000000
-40000000
+40400000
+40400000
+40400000
+40400000
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp
index d0907482c..d69521e66 100644
--- a/tools/render-test/cpu-compute-util.cpp
+++ b/tools/render-test/cpu-compute-util.cpp
@@ -16,6 +16,40 @@ namespace renderer_test {
using namespace Slang;
template <int COUNT>
+struct ValueTexture3D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture3D
+{
+ void set(void* out)
+ {
+ float* dst = (float*)out;
+ for (int i = 0; i < COUNT; ++i)
+ {
+ dst[i] = m_value;
+ }
+ }
+
+ virtual void Load(const CPPPrelude::int4& v, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+ virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE
+ {
+ set(out);
+ }
+
+ ValueTexture3D(float value) :
+ m_value(value)
+ {
+ m_interface = static_cast<CPPPrelude::ITexture3D*>(this);
+ }
+
+ float m_value;
+};
+
+template <int COUNT>
struct ValueTexture2D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture2D
{
void set(void* out)
@@ -83,6 +117,8 @@ struct ValueTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::ITex
float m_value;
};
+
+
static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, int elemCount, float value)
{
switch (baseShape)
@@ -110,12 +146,22 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
default: break;
}
}
+ case SLANG_TEXTURE_3D:
+ {
+ switch (elemCount)
+ {
+ case 1: return new ValueTexture3D<1>(value);
+ case 2: return new ValueTexture3D<2>(value);
+ case 3: return new ValueTexture3D<3>(value);
+ case 4: return new ValueTexture3D<4>(value);
+ default: break;
+ }
+ }
default: break;
}
return nullptr;
}
-
/* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext)
{
auto request = compilationAndLayout.output.request;
@@ -172,6 +218,8 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
{
case SLANG_TEXTURE_1D:
case SLANG_TEXTURE_2D:
+ case SLANG_TEXTURE_3D:
+ case SLANG_TEXTURE_CUBE:
{
SLANG_ASSERT(value->m_userIndex >= 0);
auto& srcEntry = layout.entries[value->m_userIndex];
@@ -203,11 +251,15 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape,
}
default: break;
}
+
+ if (value->m_target == nullptr)
+ {
+ SLANG_ASSERT(!"Couldn't construct resource type");
+ return SLANG_FAIL;
+ }
+
break;
}
-
- case SLANG_TEXTURE_3D:
- case SLANG_TEXTURE_CUBE:
case SLANG_TEXTURE_BUFFER:
{
// Need a CPU impl for these...
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp
index f471c2961..59b5e65f6 100644
--- a/tools/render-test/cuda/cuda-compute-util.cpp
+++ b/tools/render-test/cuda/cuda-compute-util.cpp
@@ -71,6 +71,10 @@ public:
{
SLANG_CUDA_ASSERT_ON_FAIL(cuArrayDestroy(m_cudaArray));
}
+ if (m_cudaMipMappedArray)
+ {
+ SLANG_CUDA_ASSERT_ON_FAIL(cuMipmappedArrayDestroy(m_cudaMipMappedArray));
+ }
}
static CUDATextureResource* getCUDATextureResource(BindSet::Value* value)
@@ -88,6 +92,7 @@ public:
// This is an opaque type, that's backed by a long long
CUtexObject m_cudaTexObj = CUtexObject();
CUarray m_cudaArray = CUarray();
+ CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray();
};
class ScopeCUDAModule
@@ -405,6 +410,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
auto access = type->getResourceAccess();
+ CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY;
+
auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK;
switch (baseShape)
@@ -412,6 +419,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
case SLANG_TEXTURE_1D:
case SLANG_TEXTURE_2D:
case SLANG_TEXTURE_3D:
+ case SLANG_TEXTURE_CUBE:
{
SLANG_ASSERT(value->m_userIndex >= 0);
auto& srcEntry = entries[value->m_userIndex];
@@ -426,9 +434,11 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
const auto& textureDesc = srcEntry.textureDesc;
+ // CUDA wants the unused dimensions to be 0.
+ // Might need to specially handle elsewhere
int width = textureDesc.size;
- int height = 1;
- int depth = 1;
+ int height = 0;
+ int depth = 0;
switch (baseShape)
{
@@ -444,35 +454,40 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
depth = textureDesc.size;
break;
}
+ case SLANG_TEXTURE_CUBE:
+ {
+ height = width;
+ depth = 6;
+ break;
+ }
}
TextureData texData;
generateTextureData(texData, textureDesc);
+ auto mipLevels = texData.mipLevels;
+
RefPtr<CUDATextureResource> tex = new CUDATextureResource;
size_t elementSize = 0;
{
- CUDA_ARRAY_DESCRIPTOR arrayDesc;
- arrayDesc.Width = width;
-
- // Width, and Height are the width, and height of the CUDA array (in elements); the CUDA array is one-dimensional if height is 0, two-dimensional otherwise;
- arrayDesc.Height = (baseShape == SLANG_TEXTURE_1D) ? 0 : height;
+ CUarray_format format = CU_AD_FORMAT_FLOAT;
+ int numChannels = 0;
switch (textureDesc.format)
{
case Format::R_Float32:
{
- arrayDesc.Format = CU_AD_FORMAT_FLOAT;
- arrayDesc.NumChannels = 1;
+ format = CU_AD_FORMAT_FLOAT;
+ numChannels = 1;
elementSize = sizeof(float);
break;
}
case Format::RGBA_Unorm_UInt8:
{
- arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
- arrayDesc.NumChannels = 4;
+ format = CU_AD_FORMAT_UNSIGNED_INT8;
+ numChannels = 4;
elementSize = sizeof(uint32_t);
break;
}
@@ -483,35 +498,135 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
}
}
- // Allocate the array
- SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc));
+ if (mipLevels > 1)
+ {
+ resourceType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
+
+ CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
+ memset(&arrayDesc, 0, sizeof(arrayDesc));
+
+ arrayDesc.Width = width;
+ arrayDesc.Height = height;
+ arrayDesc.Depth = depth;
+ arrayDesc.Format = format;
+ arrayDesc.NumChannels = numChannels;
+ arrayDesc.Flags = 0;
+
+ if (baseShape == SLANG_TEXTURE_CUBE)
+ {
+ arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP;
+ }
+
+ SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayCreate(&tex->m_cudaMipMappedArray, &arrayDesc, mipLevels));
+ }
+ else
+ {
+ resourceType = CU_RESOURCE_TYPE_ARRAY;
+
+ if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE)
+ {
+ CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
+ memset(&arrayDesc, 0, sizeof(arrayDesc));
+
+ arrayDesc.Depth = depth;
+ arrayDesc.Height = height;
+ arrayDesc.Width = width;
+ arrayDesc.Format = format;
+ arrayDesc.NumChannels = numChannels;
+
+ arrayDesc.Flags = 0;
+
+ SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc));
+ }
+ else
+ {
+ CUDA_ARRAY_DESCRIPTOR arrayDesc;
+ memset(&arrayDesc, 0, sizeof(arrayDesc));
+
+ arrayDesc.Width = width;
+ arrayDesc.Height = height;
+ arrayDesc.Format = format;
+ arrayDesc.NumChannels = numChannels;
+
+ // Allocate the array, will work for 1D or 2D case
+ SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc));
+ }
+ }
}
- switch (baseShape)
+ for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel)
{
- case SLANG_TEXTURE_1D:
- case SLANG_TEXTURE_2D:
+ int mipWidth = width >> mipLevel;
+ int mipHeight = height >> mipLevel;
+ int mipDepth = depth >> mipLevel;
+
+ mipWidth = (mipWidth == 0) ? 1 : mipWidth;
+ mipHeight = (mipHeight == 0) ? 1 : mipHeight;
+ mipDepth = (mipDepth == 0) ? 1 : mipDepth;
+
+ auto dstArray = tex->m_cudaArray;
+ if (tex->m_cudaMipMappedArray)
{
- // TODO(JS):
- // Not clear how the copy should be done for 1D, but seeing as it is copying to an 'array'
- // doing it with cuMemcpy2D is appropriate.
- // Not clear if the height should be 0 or 1. The array required it to be 0.
- CUDA_MEMCPY2D copyParam;
- memset(&copyParam, 0, sizeof(copyParam));
- copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
- copyParam.dstArray = tex->m_cudaArray;
- copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
- copyParam.srcHost = texData.dataBuffer[0].getBuffer();
- copyParam.srcPitch = width * elementSize;
- copyParam.WidthInBytes = copyParam.srcPitch;
- copyParam.Height = height;
- SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(&copyParam));
- break;
+ // Get the array for the mip level
+ SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&dstArray, tex->m_cudaMipMappedArray, mipLevel));
}
- case SLANG_TEXTURE_3D:
+ SLANG_ASSERT(dstArray);
+
+ const auto& srcData = texData.dataBuffer[mipLevel];
+
+ SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount());
+
+ // Check using the desc to see if it's plausible
{
- SLANG_ASSERT(!"Not implemented");
- break;
+ CUDA_ARRAY_DESCRIPTOR arrayDesc;
+ SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray));
+
+ SLANG_ASSERT(mipWidth == arrayDesc.Width);
+ SLANG_ASSERT(mipHeight == arrayDesc.Height);
+ }
+
+ switch (baseShape)
+ {
+ case SLANG_TEXTURE_1D:
+ case SLANG_TEXTURE_2D:
+ {
+ CUDA_MEMCPY2D copyParam;
+ memset(&copyParam, 0, sizeof(copyParam));
+ copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ copyParam.dstArray = dstArray;
+ copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
+ copyParam.srcHost = srcData.getBuffer();
+ copyParam.srcPitch = mipWidth * elementSize;
+ copyParam.WidthInBytes = copyParam.srcPitch;
+ copyParam.Height = mipHeight;
+ SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(&copyParam));
+ break;
+ }
+ case SLANG_TEXTURE_3D:
+ case SLANG_TEXTURE_CUBE:
+ {
+ CUDA_MEMCPY3D copyParam;
+ memset(&copyParam, 0, sizeof(copyParam));
+
+ copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ copyParam.dstArray = dstArray;
+
+ copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
+ copyParam.srcHost = srcData.getBuffer();
+ copyParam.srcPitch = mipWidth * elementSize;
+ copyParam.WidthInBytes = copyParam.srcPitch;
+ copyParam.Height = mipHeight;
+ copyParam.Depth = mipDepth;
+
+ SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(&copyParam));
+ break;
+ }
+
+ default:
+ {
+ SLANG_ASSERT(!"Not implemented");
+ break;
+ }
}
}
@@ -520,8 +635,16 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
{
CUDA_RESOURCE_DESC resDesc;
memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC));
- resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
- resDesc.res.array.hArray = tex->m_cudaArray;
+ resDesc.resType = resourceType;
+
+ if (tex->m_cudaArray)
+ {
+ resDesc.res.array.hArray = tex->m_cudaArray;
+ }
+ if (tex->m_cudaMipMappedArray)
+ {
+ resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray;
+ }
CUDA_TEXTURE_DESC texDesc;
memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC));
@@ -538,7 +661,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
break;
}
- case SLANG_TEXTURE_CUBE:
case SLANG_TEXTURE_BUFFER:
{
// Need a CUDA impl for these...