diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-02-18 14:14:16 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-02-18 14:14:16 -0500 |
| commit | 8ee39e08c48a315163fe1850dbb12ca292020d4d (patch) | |
| tree | 5041064a194849399aa587ac13b46db2088bdb05 /tools | |
| parent | e109985375712b449d365450b3d3e39416a171ce (diff) | |
First pass Texture Array support on CUDA/CPU (#1225)
* Add cubemap support.
* Add CUDA fence instrinsics.
* Added Gather for CUDA.
* Use the CUDA driver API as much as possible.
* * Support 1D texture on CPU
* WIP on 1D texture on CUDA
* Added simplified texture test
* Fix test.
* Improve texture-simple tests.
* * Add CPU support for 3d textures
* Add support for mip maps to CUDA
* Disable warnings in nvrtc
* Update CUDA docs
* WIP on 3d texture support.
* Add support for 3d textures for CPU and CUDA.
* CPU and CUDA support for cube maps.
* Add CPU support for Texture1DArray.
* Support CUDA Layered/Array type in meta library.
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/render-test/cpu-compute-util.cpp | 107 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 52 |
2 files changed, 143 insertions, 16 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index d69521e66..7be1a6c88 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -16,6 +16,36 @@ namespace renderer_test { using namespace Slang; template <int COUNT> +struct ValueTextureCube : public CPUComputeUtil::Resource, public CPPPrelude::ITextureCube +{ + void set(void* out) + { + float* dst = (float*)out; + for (int i = 0; i < COUNT; ++i) + { + dst[i] = m_value; + } + } + + virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE + { + set(out); + } + + ValueTextureCube(float value) : + m_value(value) + { + m_interface = static_cast<CPPPrelude::ITextureCube*>(this); + } + + float m_value; +}; + +template <int COUNT> struct ValueTexture3D : public CPUComputeUtil::Resource, public CPPPrelude::ITexture3D { void set(void* out) @@ -118,10 +148,43 @@ struct ValueTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::ITex }; +template <int COUNT> +struct ValueTexture1DArray : public CPUComputeUtil::Resource, public CPPPrelude::ITexture1DArray +{ + void set(void* out) + { + float* dst = (float*)out; + for (int i = 0; i < COUNT; ++i) + { + dst[i] = m_value; + } + } -static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, int elemCount, float value) + virtual void Load(const CPPPrelude::int3& v, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float2& loc, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float2& loc, float level, void* out) SLANG_OVERRIDE + { + set(out); + } + + ValueTexture1DArray(float value) : + m_value(value) + { + m_interface = static_cast<CPPPrelude::ITexture1DArray*>(this); + } + + float m_value; +}; + +static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int elemCount, float value) { - switch (baseShape) + switch (shape) { case SLANG_TEXTURE_1D: { @@ -157,6 +220,30 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, default: break; } } + case SLANG_TEXTURE_CUBE: + { + switch (elemCount) + { + case 1: return new ValueTextureCube<1>(value); + case 2: return new ValueTextureCube<2>(value); + case 3: return new ValueTextureCube<3>(value); + case 4: return new ValueTextureCube<4>(value); + default: break; + } + } + case SLANG_TEXTURE_1D_ARRAY: + { + switch (elemCount) + { + case 1: return new ValueTexture1DArray<1>(value); + case 2: return new ValueTexture1DArray<2>(value); + case 3: return new ValueTexture1DArray<3>(value); + case 4: return new ValueTexture1DArray<4>(value); + default: break; + } + break; + } + default: break; } return nullptr; @@ -224,10 +311,9 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, SLANG_ASSERT(value->m_userIndex >= 0); auto& srcEntry = layout.entries[value->m_userIndex]; - - // TODO(JS): - // We should use the srcEntry to determine what data to store in the texture, - // it's dimensions etc. For now we just support it being 1. + // TODO(JS): Currently we support only textures who's content is either + // 0 or 1. This is because this is easy to implement. + // Will need to do something better in the future.. slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); @@ -241,12 +327,12 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, { case InputTextureContent::One: { - value->m_target = _newValueTexture(baseShape, count, 1.0f); + value->m_target = _newValueTexture(shape, count, 1.0f); break; } case InputTextureContent::Zero: { - value->m_target = _newValueTexture(baseShape, count, 0.0f); + value->m_target = _newValueTexture(shape, count, 0.0f); break; } default: break; @@ -335,13 +421,14 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape baseShape, assert(!"unhandled case"); break; case SLANG_TEXTURE_1D: + case SLANG_TEXTURE_2D: case SLANG_TEXTURE_3D: case SLANG_TEXTURE_CUBE: case SLANG_TEXTURE_BUFFER: - case SLANG_TEXTURE_2D: { Resource* targetResource = value ? static_cast<Resource*>(value->m_target.Ptr()) : nullptr; - *location.getUniform<void*>() = targetResource ? targetResource->getInterface() : nullptr; + void* intf = targetResource ? targetResource->getInterface() : nullptr; + *location.getUniform<void*>() = intf; break; } case SLANG_STRUCTURED_BUFFER: diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index 59b5e65f6..779cfc96a 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -528,6 +528,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp CUDA_ARRAY3D_DESCRIPTOR arrayDesc; memset(&arrayDesc, 0, sizeof(arrayDesc)); + // If we have a cubemap the depth is 6 arrayDesc.Depth = depth; arrayDesc.Height = height; arrayDesc.Width = width; @@ -536,6 +537,12 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp arrayDesc.Flags = 0; + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Depth = 6; + arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; + } + SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); } else @@ -554,6 +561,9 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp } } + // Work space for holding data for uploading if it needs to be rearranged + List<uint8_t> workspace; + for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel) { int mipWidth = width >> mipLevel; @@ -564,6 +574,12 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp mipHeight = (mipHeight == 0) ? 1 : mipHeight; mipDepth = (mipDepth == 0) ? 1 : mipDepth; + // If it's a cubemap then the depth is always 6 + if (baseShape == SLANG_TEXTURE_CUBE) + { + mipDepth = 6; + } + auto dstArray = tex->m_cudaArray; if (tex->m_cudaMipMappedArray) { @@ -572,9 +588,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp } SLANG_ASSERT(dstArray); - const auto& srcData = texData.dataBuffer[mipLevel]; - - SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount()); // Check using the desc to see if it's plausible { @@ -582,7 +595,34 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray)); SLANG_ASSERT(mipWidth == arrayDesc.Width); - SLANG_ASSERT(mipHeight == arrayDesc.Height); + SLANG_ASSERT(mipHeight == arrayDesc.Height || (mipHeight == 1 && arrayDesc.Height == 0)); + } + + const void* srcDataPtr = nullptr; + + if (baseShape == SLANG_TEXTURE_CUBE) + { + size_t faceSizeInBytes = elementSize * mipWidth * mipHeight; + + workspace.setCount(faceSizeInBytes * 6); + + // Copy the data over to make contiguous + for (Index j = 0; j < 6; j++) + { + const auto& srcData = texData.dataBuffer[mipLevels * j + mipLevel]; + SLANG_ASSERT(mipWidth * mipHeight == srcData.getCount()); + + ::memcpy(workspace.getBuffer() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes); + } + + srcDataPtr = workspace.getBuffer(); + } + else + { + const auto& srcData = texData.dataBuffer[mipLevel]; + SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount()); + + srcDataPtr = srcData.getBuffer(); } switch (baseShape) @@ -595,7 +635,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; copyParam.dstArray = dstArray; copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = srcData.getBuffer(); + copyParam.srcHost = srcDataPtr; copyParam.srcPitch = mipWidth * elementSize; copyParam.WidthInBytes = copyParam.srcPitch; copyParam.Height = mipHeight; @@ -612,7 +652,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp copyParam.dstArray = dstArray; copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = srcData.getBuffer(); + copyParam.srcHost = srcDataPtr; copyParam.srcPitch = mipWidth * elementSize; copyParam.WidthInBytes = copyParam.srcPitch; copyParam.Height = mipHeight; |
