diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-02-18 12:40:14 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-02-18 12:40:14 -0500 |
| commit | e109985375712b449d365450b3d3e39416a171ce (patch) | |
| tree | 56a2c805368d5afbfa568e514af0704b8ed7346c | |
| parent | 2c097545eaa324a91a035327abad2e8b4fa60469 (diff) | |
CUDA/CPU resource coverage (#1224)
* Add cubemap support.
* Add CUDA fence instrinsics.
* Added Gather for CUDA.
* Use the CUDA driver API as much as possible.
* * Support 1D texture on CPU
* WIP on 1D texture on CUDA
* Added simplified texture test
* Fix test.
* Improve texture-simple tests.
* * Add CPU support for 3d textures
* Add support for mip maps to CUDA
* Disable warnings in nvrtc
* Update CUDA docs
* WIP on 3d texture support.
* Add support for 3d textures for CPU and CUDA.
| -rw-r--r-- | docs/cuda-target.md | 10 | ||||
| -rw-r--r-- | prelude/slang-cpp-types.h | 17 | ||||
| -rw-r--r-- | source/core/slang-nvrtc-compiler.cpp | 8 | ||||
| -rw-r--r-- | tests/compute/texture-simple.slang | 5 | ||||
| -rw-r--r-- | tests/compute/texture-simple.slang.expected.txt | 8 | ||||
| -rw-r--r-- | tools/render-test/cpu-compute-util.cpp | 60 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 196 |
7 files changed, 251 insertions, 53 deletions
diff --git a/docs/cuda-target.md b/docs/cuda-target.md index 9c82b1dc9..79251251b 100644 --- a/docs/cuda-target.md +++ b/docs/cuda-target.md @@ -20,8 +20,7 @@ These limitations apply to Slang transpiling to CUDA. The following are a work in progress or not implmented but are planned to be so in the future -* Barriers/Atomics/Complex resource types -* Preliminary version does maps StructuredBuffers to a pointer - and without boudn checking +* Resource types including surfaces # How it works @@ -137,9 +136,8 @@ For a client application - as long as the requirements of the generated code are That for pass-through usage, prelude is not pre-pended, preludes are for code generation only. */ -virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPrelude( -SlangPassThrough passThrough, -const char* preludeText) = 0; + +void setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText); ``` The code that sets up the prelude for the test infrastucture and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`. @@ -152,5 +150,3 @@ Language aspects Slang follows the HLSL convention that arrays are passed by value. This is in contrast with CUDA where arrays follow C++ conventions and are passed by reference. To make generated CUDA follow this convention an array is turned into a 'FixedArray' struct type. To get something more similar to CUDA/C++ operation the array can be marked in out or inout to make it passed by reference. - - diff --git a/prelude/slang-cpp-types.h b/prelude/slang-cpp-types.h index 2238727c5..936233afc 100644 --- a/prelude/slang-cpp-types.h +++ b/prelude/slang-cpp-types.h @@ -262,6 +262,23 @@ struct Texture2D ITexture2D* texture; }; +struct ITexture3D +{ + virtual void Load(const int4& v, void* out) = 0; + virtual void Sample(SamplerState samplerState, const float3& loc, void* out) = 0; + virtual void SampleLevel(SamplerState samplerState, const float3& loc, float level, void* out) = 0; +}; + +template <typename T> +struct Texture3D +{ + T Load(const int4& v) const { T out; texture->Load(v, &out); return out; } + T Sample(SamplerState samplerState, const float3& v) const { T out; texture->Sample(samplerState, v, &out); return out; } + T SampleLevel(SamplerState samplerState, const float3& v, float level) { T out; texture->SampleLevel(samplerState, v, level, &out); return out; } + + ITexture3D* texture; +}; + /* Varying input for Compute */ /* Used when running a single thread */ diff --git a/source/core/slang-nvrtc-compiler.cpp b/source/core/slang-nvrtc-compiler.cpp index 6464592a5..f68c4dc01 100644 --- a/source/core/slang-nvrtc-compiler.cpp +++ b/source/core/slang-nvrtc-compiler.cpp @@ -297,8 +297,16 @@ SlangResult NVRTCDownstreamCompiler::compile(const CompileOptions& options, RefP cmdLine.addArg(include); } + // Neither of these options are strictly required, for general use of nvrtc, + // but are enabled to make use withing Slang work more smoothly { + // Require c++14, as makes initialization construction with {} available and so simplifies code generation cmdLine.addArg("-std=c++14"); + + // Disable all warnings + // This is arguably too much - but nvrtc does not appear to have a mechanism to switch off individual warnings. + // I tried the -Xcudafe mechanism but that does not appear to work for nvrtc + cmdLine.addArg("-w"); } nvrtcProgram program = nullptr; diff --git a/tests/compute/texture-simple.slang b/tests/compute/texture-simple.slang index 040af2784..e79a26885 100644 --- a/tests/compute/texture-simple.slang +++ b/tests/compute/texture-simple.slang @@ -2,7 +2,7 @@ //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 //TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -// TODO(JS): Doesn't work on vk currently +// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute @@ -10,6 +10,8 @@ Texture1D<float> t1D; //TEST_INPUT: Texture2D(size=4, content = one):name t2D Texture2D<float> t2D; +//TEST_INPUT: Texture3D(size=4, content = one):name t3D +Texture3D<float> t3D; //TEST_INPUT: Sampler:name samplerState SamplerState samplerState; @@ -26,6 +28,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) float val = 0.0f; val += t1D.SampleLevel(samplerState, u, 0); val += t2D.SampleLevel(samplerState, float2(u, u), 0); + val += t3D.SampleLevel(samplerState, float3(u, u, u), 0); outputBuffer[idx] = val; } diff --git a/tests/compute/texture-simple.slang.expected.txt b/tests/compute/texture-simple.slang.expected.txt index f5cf6fb10..e54af3bc8 100644 --- a/tests/compute/texture-simple.slang.expected.txt +++ b/tests/compute/texture-simple.slang.expected.txt @@ -1,4 +1,4 @@ -40000000 -40000000 -40000000 -40000000 +40400000 +40400000 +40400000 +40400000 diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index d0907482c..d69521e66 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -16,6 +16,40 @@ namespace renderer_test { using namespace Slang; template <int COUNT> +struct ValueTexture3D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture3D +{ + void set(void* out) + { + float* dst = (float*)out; + for (int i = 0; i < COUNT; ++i) + { + dst[i] = m_value; + } + } + + virtual void Load(const CPPPrelude::int4& v, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE + { + set(out); + } + + ValueTexture3D(float value) : + m_value(value) + { + m_interface = static_cast<CPPPrelude::ITexture3D*>(this); + } + + float m_value; +}; + +template <int COUNT> struct ValueTexture2D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture2D { void set(void* out) @@ -83,6 +117,8 @@ struct ValueTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::ITex float m_value; }; + + static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, int elemCount, float value) { switch (baseShape) @@ -110,12 +146,22 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, default: break; } } + case SLANG_TEXTURE_3D: + { + switch (elemCount) + { + case 1: return new ValueTexture3D<1>(value); + case 2: return new ValueTexture3D<2>(value); + case 3: return new ValueTexture3D<3>(value); + case 4: return new ValueTexture3D<4>(value); + default: break; + } + } default: break; } return nullptr; } - /* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext) { auto request = compilationAndLayout.output.request; @@ -172,6 +218,8 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, { case SLANG_TEXTURE_1D: case SLANG_TEXTURE_2D: + case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: { SLANG_ASSERT(value->m_userIndex >= 0); auto& srcEntry = layout.entries[value->m_userIndex]; @@ -203,11 +251,15 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, } default: break; } + + if (value->m_target == nullptr) + { + SLANG_ASSERT(!"Couldn't construct resource type"); + return SLANG_FAIL; + } + break; } - - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: case SLANG_TEXTURE_BUFFER: { // Need a CPU impl for these... diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index f471c2961..59b5e65f6 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -71,6 +71,10 @@ public: { SLANG_CUDA_ASSERT_ON_FAIL(cuArrayDestroy(m_cudaArray)); } + if (m_cudaMipMappedArray) + { + SLANG_CUDA_ASSERT_ON_FAIL(cuMipmappedArrayDestroy(m_cudaMipMappedArray)); + } } static CUDATextureResource* getCUDATextureResource(BindSet::Value* value) @@ -88,6 +92,7 @@ public: // This is an opaque type, that's backed by a long long CUtexObject m_cudaTexObj = CUtexObject(); CUarray m_cudaArray = CUarray(); + CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray(); }; class ScopeCUDAModule @@ -405,6 +410,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp auto access = type->getResourceAccess(); + CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY; + auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; switch (baseShape) @@ -412,6 +419,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_TEXTURE_1D: case SLANG_TEXTURE_2D: case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: { SLANG_ASSERT(value->m_userIndex >= 0); auto& srcEntry = entries[value->m_userIndex]; @@ -426,9 +434,11 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp const auto& textureDesc = srcEntry.textureDesc; + // CUDA wants the unused dimensions to be 0. + // Might need to specially handle elsewhere int width = textureDesc.size; - int height = 1; - int depth = 1; + int height = 0; + int depth = 0; switch (baseShape) { @@ -444,35 +454,40 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp depth = textureDesc.size; break; } + case SLANG_TEXTURE_CUBE: + { + height = width; + depth = 6; + break; + } } TextureData texData; generateTextureData(texData, textureDesc); + auto mipLevels = texData.mipLevels; + RefPtr<CUDATextureResource> tex = new CUDATextureResource; size_t elementSize = 0; { - CUDA_ARRAY_DESCRIPTOR arrayDesc; - arrayDesc.Width = width; - - // Width, and Height are the width, and height of the CUDA array (in elements); the CUDA array is one-dimensional if height is 0, two-dimensional otherwise; - arrayDesc.Height = (baseShape == SLANG_TEXTURE_1D) ? 0 : height; + CUarray_format format = CU_AD_FORMAT_FLOAT; + int numChannels = 0; switch (textureDesc.format) { case Format::R_Float32: { - arrayDesc.Format = CU_AD_FORMAT_FLOAT; - arrayDesc.NumChannels = 1; + format = CU_AD_FORMAT_FLOAT; + numChannels = 1; elementSize = sizeof(float); break; } case Format::RGBA_Unorm_UInt8: { - arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8; - arrayDesc.NumChannels = 4; + format = CU_AD_FORMAT_UNSIGNED_INT8; + numChannels = 4; elementSize = sizeof(uint32_t); break; } @@ -483,35 +498,135 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp } } - // Allocate the array - SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc)); + if (mipLevels > 1) + { + resourceType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; + + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Width = width; + arrayDesc.Height = height; + arrayDesc.Depth = depth; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + arrayDesc.Flags = 0; + + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; + } + + SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayCreate(&tex->m_cudaMipMappedArray, &arrayDesc, mipLevels)); + } + else + { + resourceType = CU_RESOURCE_TYPE_ARRAY; + + if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE) + { + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Depth = depth; + arrayDesc.Height = height; + arrayDesc.Width = width; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + + arrayDesc.Flags = 0; + + SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); + } + else + { + CUDA_ARRAY_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Width = width; + arrayDesc.Height = height; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + + // Allocate the array, will work for 1D or 2D case + SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc)); + } + } } - switch (baseShape) + for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel) { - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: + int mipWidth = width >> mipLevel; + int mipHeight = height >> mipLevel; + int mipDepth = depth >> mipLevel; + + mipWidth = (mipWidth == 0) ? 1 : mipWidth; + mipHeight = (mipHeight == 0) ? 1 : mipHeight; + mipDepth = (mipDepth == 0) ? 1 : mipDepth; + + auto dstArray = tex->m_cudaArray; + if (tex->m_cudaMipMappedArray) { - // TODO(JS): - // Not clear how the copy should be done for 1D, but seeing as it is copying to an 'array' - // doing it with cuMemcpy2D is appropriate. - // Not clear if the height should be 0 or 1. The array required it to be 0. - CUDA_MEMCPY2D copyParam; - memset(©Param, 0, sizeof(copyParam)); - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = tex->m_cudaArray; - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = texData.dataBuffer[0].getBuffer(); - copyParam.srcPitch = width * elementSize; - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = height; - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); - break; + // Get the array for the mip level + SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&dstArray, tex->m_cudaMipMappedArray, mipLevel)); } - case SLANG_TEXTURE_3D: + SLANG_ASSERT(dstArray); + + const auto& srcData = texData.dataBuffer[mipLevel]; + + SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount()); + + // Check using the desc to see if it's plausible { - SLANG_ASSERT(!"Not implemented"); - break; + CUDA_ARRAY_DESCRIPTOR arrayDesc; + SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray)); + + SLANG_ASSERT(mipWidth == arrayDesc.Width); + SLANG_ASSERT(mipHeight == arrayDesc.Height); + } + + switch (baseShape) + { + case SLANG_TEXTURE_1D: + case SLANG_TEXTURE_2D: + { + CUDA_MEMCPY2D copyParam; + memset(©Param, 0, sizeof(copyParam)); + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = dstArray; + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = srcData.getBuffer(); + copyParam.srcPitch = mipWidth * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = mipHeight; + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); + break; + } + case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: + { + CUDA_MEMCPY3D copyParam; + memset(©Param, 0, sizeof(copyParam)); + + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = dstArray; + + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = srcData.getBuffer(); + copyParam.srcPitch = mipWidth * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = mipHeight; + copyParam.Depth = mipDepth; + + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(©Param)); + break; + } + + default: + { + SLANG_ASSERT(!"Not implemented"); + break; + } } } @@ -520,8 +635,16 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp { CUDA_RESOURCE_DESC resDesc; memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC)); - resDesc.resType = CU_RESOURCE_TYPE_ARRAY; - resDesc.res.array.hArray = tex->m_cudaArray; + resDesc.resType = resourceType; + + if (tex->m_cudaArray) + { + resDesc.res.array.hArray = tex->m_cudaArray; + } + if (tex->m_cudaMipMappedArray) + { + resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; + } CUDA_TEXTURE_DESC texDesc; memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); @@ -538,7 +661,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp break; } - case SLANG_TEXTURE_CUBE: case SLANG_TEXTURE_BUFFER: { // Need a CUDA impl for these... |
