diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-02-18 12:40:14 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-02-18 12:40:14 -0500 |
| commit | e109985375712b449d365450b3d3e39416a171ce (patch) | |
| tree | 56a2c805368d5afbfa568e514af0704b8ed7346c /tools | |
| parent | 2c097545eaa324a91a035327abad2e8b4fa60469 (diff) | |
CUDA/CPU resource coverage (#1224)
* Add cubemap support.
* Add CUDA fence instrinsics.
* Added Gather for CUDA.
* Use the CUDA driver API as much as possible.
* * Support 1D texture on CPU
* WIP on 1D texture on CUDA
* Added simplified texture test
* Fix test.
* Improve texture-simple tests.
* * Add CPU support for 3d textures
* Add support for mip maps to CUDA
* Disable warnings in nvrtc
* Update CUDA docs
* WIP on 3d texture support.
* Add support for 3d textures for CPU and CUDA.
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/render-test/cpu-compute-util.cpp | 60 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 196 |
2 files changed, 215 insertions, 41 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index d0907482c..d69521e66 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -16,6 +16,40 @@ namespace renderer_test { using namespace Slang; template <int COUNT> +struct ValueTexture3D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture3D +{ + void set(void* out) + { + float* dst = (float*)out; + for (int i = 0; i < COUNT; ++i) + { + dst[i] = m_value; + } + } + + virtual void Load(const CPPPrelude::int4& v, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE + { + set(out); + } + + ValueTexture3D(float value) : + m_value(value) + { + m_interface = static_cast<CPPPrelude::ITexture3D*>(this); + } + + float m_value; +}; + +template <int COUNT> struct ValueTexture2D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture2D { void set(void* out) @@ -83,6 +117,8 @@ struct ValueTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::ITex float m_value; }; + + static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, int elemCount, float value) { switch (baseShape) @@ -110,12 +146,22 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, default: break; } } + case SLANG_TEXTURE_3D: + { + switch (elemCount) + { + case 1: return new ValueTexture3D<1>(value); + case 2: return new ValueTexture3D<2>(value); + case 3: return new ValueTexture3D<3>(value); + case 4: return new ValueTexture3D<4>(value); + default: break; + } + } default: break; } return nullptr; } - /* static */SlangResult CPUComputeUtil::calcBindings(const ShaderCompilerUtil::OutputAndLayout& compilationAndLayout, Context& outContext) { auto request = compilationAndLayout.output.request; @@ -172,6 +218,8 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, { case SLANG_TEXTURE_1D: case SLANG_TEXTURE_2D: + case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: { SLANG_ASSERT(value->m_userIndex >= 0); auto& srcEntry = layout.entries[value->m_userIndex]; @@ -203,11 +251,15 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, } default: break; } + + if (value->m_target == nullptr) + { + SLANG_ASSERT(!"Couldn't construct resource type"); + return SLANG_FAIL; + } + break; } - - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: case SLANG_TEXTURE_BUFFER: { // Need a CPU impl for these... diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index f471c2961..59b5e65f6 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -71,6 +71,10 @@ public: { SLANG_CUDA_ASSERT_ON_FAIL(cuArrayDestroy(m_cudaArray)); } + if (m_cudaMipMappedArray) + { + SLANG_CUDA_ASSERT_ON_FAIL(cuMipmappedArrayDestroy(m_cudaMipMappedArray)); + } } static CUDATextureResource* getCUDATextureResource(BindSet::Value* value) @@ -88,6 +92,7 @@ public: // This is an opaque type, that's backed by a long long CUtexObject m_cudaTexObj = CUtexObject(); CUarray m_cudaArray = CUarray(); + CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray(); }; class ScopeCUDAModule @@ -405,6 +410,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp auto access = type->getResourceAccess(); + CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY; + auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; switch (baseShape) @@ -412,6 +419,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_TEXTURE_1D: case SLANG_TEXTURE_2D: case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: { SLANG_ASSERT(value->m_userIndex >= 0); auto& srcEntry = entries[value->m_userIndex]; @@ -426,9 +434,11 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp const auto& textureDesc = srcEntry.textureDesc; + // CUDA wants the unused dimensions to be 0. + // Might need to specially handle elsewhere int width = textureDesc.size; - int height = 1; - int depth = 1; + int height = 0; + int depth = 0; switch (baseShape) { @@ -444,35 +454,40 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp depth = textureDesc.size; break; } + case SLANG_TEXTURE_CUBE: + { + height = width; + depth = 6; + break; + } } TextureData texData; generateTextureData(texData, textureDesc); + auto mipLevels = texData.mipLevels; + RefPtr<CUDATextureResource> tex = new CUDATextureResource; size_t elementSize = 0; { - CUDA_ARRAY_DESCRIPTOR arrayDesc; - arrayDesc.Width = width; - - // Width, and Height are the width, and height of the CUDA array (in elements); the CUDA array is one-dimensional if height is 0, two-dimensional otherwise; - arrayDesc.Height = (baseShape == SLANG_TEXTURE_1D) ? 0 : height; + CUarray_format format = CU_AD_FORMAT_FLOAT; + int numChannels = 0; switch (textureDesc.format) { case Format::R_Float32: { - arrayDesc.Format = CU_AD_FORMAT_FLOAT; - arrayDesc.NumChannels = 1; + format = CU_AD_FORMAT_FLOAT; + numChannels = 1; elementSize = sizeof(float); break; } case Format::RGBA_Unorm_UInt8: { - arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8; - arrayDesc.NumChannels = 4; + format = CU_AD_FORMAT_UNSIGNED_INT8; + numChannels = 4; elementSize = sizeof(uint32_t); break; } @@ -483,35 +498,135 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp } } - // Allocate the array - SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc)); + if (mipLevels > 1) + { + resourceType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; + + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Width = width; + arrayDesc.Height = height; + arrayDesc.Depth = depth; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + arrayDesc.Flags = 0; + + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; + } + + SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayCreate(&tex->m_cudaMipMappedArray, &arrayDesc, mipLevels)); + } + else + { + resourceType = CU_RESOURCE_TYPE_ARRAY; + + if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE) + { + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Depth = depth; + arrayDesc.Height = height; + arrayDesc.Width = width; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + + arrayDesc.Flags = 0; + + SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); + } + else + { + CUDA_ARRAY_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Width = width; + arrayDesc.Height = height; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + + // Allocate the array, will work for 1D or 2D case + SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc)); + } + } } - switch (baseShape) + for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel) { - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: + int mipWidth = width >> mipLevel; + int mipHeight = height >> mipLevel; + int mipDepth = depth >> mipLevel; + + mipWidth = (mipWidth == 0) ? 1 : mipWidth; + mipHeight = (mipHeight == 0) ? 1 : mipHeight; + mipDepth = (mipDepth == 0) ? 1 : mipDepth; + + auto dstArray = tex->m_cudaArray; + if (tex->m_cudaMipMappedArray) { - // TODO(JS): - // Not clear how the copy should be done for 1D, but seeing as it is copying to an 'array' - // doing it with cuMemcpy2D is appropriate. - // Not clear if the height should be 0 or 1. The array required it to be 0. - CUDA_MEMCPY2D copyParam; - memset(©Param, 0, sizeof(copyParam)); - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = tex->m_cudaArray; - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = texData.dataBuffer[0].getBuffer(); - copyParam.srcPitch = width * elementSize; - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = height; - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); - break; + // Get the array for the mip level + SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&dstArray, tex->m_cudaMipMappedArray, mipLevel)); } - case SLANG_TEXTURE_3D: + SLANG_ASSERT(dstArray); + + const auto& srcData = texData.dataBuffer[mipLevel]; + + SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount()); + + // Check using the desc to see if it's plausible { - SLANG_ASSERT(!"Not implemented"); - break; + CUDA_ARRAY_DESCRIPTOR arrayDesc; + SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray)); + + SLANG_ASSERT(mipWidth == arrayDesc.Width); + SLANG_ASSERT(mipHeight == arrayDesc.Height); + } + + switch (baseShape) + { + case SLANG_TEXTURE_1D: + case SLANG_TEXTURE_2D: + { + CUDA_MEMCPY2D copyParam; + memset(©Param, 0, sizeof(copyParam)); + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = dstArray; + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = srcData.getBuffer(); + copyParam.srcPitch = mipWidth * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = mipHeight; + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); + break; + } + case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: + { + CUDA_MEMCPY3D copyParam; + memset(©Param, 0, sizeof(copyParam)); + + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = dstArray; + + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = srcData.getBuffer(); + copyParam.srcPitch = mipWidth * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = mipHeight; + copyParam.Depth = mipDepth; + + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(©Param)); + break; + } + + default: + { + SLANG_ASSERT(!"Not implemented"); + break; + } } } @@ -520,8 +635,16 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp { CUDA_RESOURCE_DESC resDesc; memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC)); - resDesc.resType = CU_RESOURCE_TYPE_ARRAY; - resDesc.res.array.hArray = tex->m_cudaArray; + resDesc.resType = resourceType; + + if (tex->m_cudaArray) + { + resDesc.res.array.hArray = tex->m_cudaArray; + } + if (tex->m_cudaMipMappedArray) + { + resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; + } CUDA_TEXTURE_DESC texDesc; memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); @@ -538,7 +661,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp break; } - case SLANG_TEXTURE_CUBE: case SLANG_TEXTURE_BUFFER: { // Need a CUDA impl for these... |
