diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-02-20 10:34:58 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-02-20 10:34:58 -0500 |
| commit | dcc3af7bf4fdfb6926db039506137be0e16ae3bd (patch) | |
| tree | d11c051624952fb3ef7614338a9b617e1bf059f4 /tools | |
| parent | 788556aaaab1b5767e24cf86dc2f71fd285c06f5 (diff) | |
CUDA/CPU support for 1D, 2D, CubeArray (#1232)
* CUDA support for array of resources.
* * Add support for Texture2DArray on CPU
* Expand texture-simple.slang to test Texture2DArray
* Reorganise CUDAComputeUtil to split out createTextureResource.
* Add TextureCubeArray support for CPU/CUDA targets.
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/render-test/cpu-compute-util.cpp | 90 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 691 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.h | 7 |
3 files changed, 495 insertions, 293 deletions
diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index 7be1a6c88..608da9461 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -182,6 +182,71 @@ struct ValueTexture1DArray : public CPUComputeUtil::Resource, public CPPPrelude: float m_value; }; +template <int COUNT> +struct ValueTexture2DArray : public CPUComputeUtil::Resource, public CPPPrelude::ITexture2DArray +{ + void set(void* out) + { + float* dst = (float*)out; + for (int i = 0; i < COUNT; ++i) + { + dst[i] = m_value; + } + } + + virtual void Load(const CPPPrelude::int4& v, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float3& loc, float level, void* out) SLANG_OVERRIDE + { + set(out); + } + + ValueTexture2DArray(float value) : + m_value(value) + { + m_interface = static_cast<CPPPrelude::ITexture2DArray*>(this); + } + + float m_value; +}; + + +template <int COUNT> +struct ValueTextureCubeArray : public CPUComputeUtil::Resource, public CPPPrelude::ITextureCubeArray +{ + void set(void* out) + { + float* dst = (float*)out; + for (int i = 0; i < COUNT; ++i) + { + dst[i] = m_value; + } + } + + virtual void Sample(CPPPrelude::SamplerState samplerState, const CPPPrelude::float4& loc, void* out) SLANG_OVERRIDE + { + set(out); + } + virtual void SampleLevel(CPPPrelude::SamplerState samplerState, const CPPPrelude::float4& loc, float level, void* out) SLANG_OVERRIDE + { + set(out); + } + + ValueTextureCubeArray(float value) : + m_value(value) + { + m_interface = static_cast<CPPPrelude::ITextureCubeArray*>(this); + } + + float m_value; +}; + static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int elemCount, float value) { switch (shape) @@ -243,6 +308,31 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int } break; } + case SLANG_TEXTURE_2D_ARRAY: + { + switch (elemCount) + { + case 1: return new ValueTexture2DArray<1>(value); + case 2: return new ValueTexture2DArray<2>(value); + case 3: return new ValueTexture2DArray<3>(value); + case 4: return new ValueTexture2DArray<4>(value); + default: break; + } + break; + } + case SLANG_TEXTURE_CUBE_ARRAY: + { + switch (elemCount) + { + case 1: return new ValueTextureCubeArray<1>(value); + case 2: return new ValueTextureCubeArray<2>(value); + case 3: return new ValueTextureCubeArray<3>(value); + case 4: return new ValueTextureCubeArray<4>(value); + default: break; + } + break; + } + default: break; } diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index 779cfc96a..bce98c1cb 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -28,10 +28,10 @@ SLANG_FORCE_INLINE static bool _isError(cudaError_t result) { return result != 0 #define SLANG_CUDA_ASSERT_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); }; } -class CUDAResource : public RefObject +class CUDAResource : public CUDAComputeUtil::ResourceBase { public: - typedef RefObject Super; + typedef CUDAComputeUtil::ResourceBase Super; /// Dtor ~CUDAResource() @@ -56,10 +56,10 @@ public: CUdeviceptr m_cudaMemory = CUdeviceptr(); }; -class CUDATextureResource : public RefObject +class CUDATextureResource : public CUDAComputeUtil::ResourceBase { public: - typedef RefObject Super; + typedef CUDAComputeUtil::ResourceBase Super; ~CUDATextureResource() { @@ -166,8 +166,6 @@ public: CUstream m_stream; }; - - static int _calcSMCountPerMultiProcessor(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine @@ -337,6 +335,396 @@ public: return SLANG_SUCCEEDED(context.init(0)); } +/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<ResourceBase>& outResource) +{ + auto type = typeLayout->getType(); + auto shape = type->getResourceShape(); + + auto access = type->getResourceAccess(); + + CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY; + auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; + + slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); + + const auto& textureDesc = srcEntry.textureDesc; + + // CUDA wants the unused dimensions to be 0. + // Might need to specially handle elsewhere + int width = textureDesc.size; + int height = 0; + int depth = 0; + + switch (baseShape) + { + case SLANG_TEXTURE_1D: + { + break; + } + case SLANG_TEXTURE_2D: + { + height = textureDesc.size; + break; + } + case SLANG_TEXTURE_3D: + { + height = textureDesc.size; + depth = textureDesc.size; + break; + } + case SLANG_TEXTURE_CUBE: + { + height = width; + depth = 1; + break; + } + default: + { + SLANG_ASSERT(!"Type not supported"); + return SLANG_FAIL; + } + } + + TextureData texData; + generateTextureData(texData, textureDesc); + + auto mipLevels = texData.mipLevels; + + RefPtr<CUDATextureResource> tex = new CUDATextureResource; + + size_t elementSize = 0; + + { + CUarray_format format = CU_AD_FORMAT_FLOAT; + int numChannels = 0; + + switch (textureDesc.format) + { + case Format::R_Float32: + { + format = CU_AD_FORMAT_FLOAT; + numChannels = 1; + elementSize = sizeof(float); + break; + } + case Format::RGBA_Unorm_UInt8: + { + format = CU_AD_FORMAT_UNSIGNED_INT8; + numChannels = 4; + elementSize = sizeof(uint32_t); + break; + } + default: + { + SLANG_ASSERT(!"Only support R_Float32/RGBA_Unorm_UInt8 formats for now"); + return SLANG_FAIL; + } + } + + if (mipLevels > 1) + { + resourceType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; + + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Width = width; + arrayDesc.Height = height; + arrayDesc.Depth = depth; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + arrayDesc.Flags = 0; + + if (textureDesc.arrayLength > 1) + { + if (baseShape == SLANG_TEXTURE_1D || + baseShape == SLANG_TEXTURE_2D || + baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Flags |= CUDA_ARRAY3D_LAYERED; + arrayDesc.Depth = textureDesc.arrayLength; + } + else + { + SLANG_ASSERT(!"Arrays only supported for 1D and 2D"); + return SLANG_FAIL; + } + } + + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; + arrayDesc.Depth *= 6; + } + + SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayCreate(&tex->m_cudaMipMappedArray, &arrayDesc, mipLevels)); + } + else + { + resourceType = CU_RESOURCE_TYPE_ARRAY; + + if (textureDesc.arrayLength > 1) + { + if (baseShape == SLANG_TEXTURE_1D || baseShape == SLANG_TEXTURE_2D || baseShape == SLANG_TEXTURE_CUBE) + { + SLANG_ASSERT(!"Only 1D, 2D and Cube arrays supported"); + return SLANG_FAIL; + } + + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + // Set the depth as the array length + arrayDesc.Depth = textureDesc.arrayLength; + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Depth *= 6; + } + + arrayDesc.Height = height; + arrayDesc.Width = width; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + + SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); + } + else if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE) + { + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Depth = depth; + arrayDesc.Height = height; + arrayDesc.Width = width; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + + arrayDesc.Flags = 0; + + // Handle cube texture + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Depth = 6; + arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; + } + + SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); + } + else + { + CUDA_ARRAY_DESCRIPTOR arrayDesc; + memset(&arrayDesc, 0, sizeof(arrayDesc)); + + arrayDesc.Width = width; + arrayDesc.Height = height; + arrayDesc.Format = format; + arrayDesc.NumChannels = numChannels; + + // Allocate the array, will work for 1D or 2D case + SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc)); + } + } + } + + // Work space for holding data for uploading if it needs to be rearranged + List<uint8_t> workspace; + + for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel) + { + int mipWidth = width >> mipLevel; + int mipHeight = height >> mipLevel; + int mipDepth = depth >> mipLevel; + + mipWidth = (mipWidth == 0) ? 1 : mipWidth; + mipHeight = (mipHeight == 0) ? 1 : mipHeight; + mipDepth = (mipDepth == 0) ? 1 : mipDepth; + + // If it's a cubemap then the depth is always 6 + if (baseShape == SLANG_TEXTURE_CUBE) + { + mipDepth = 6; + } + + auto dstArray = tex->m_cudaArray; + if (tex->m_cudaMipMappedArray) + { + // Get the array for the mip level + SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&dstArray, tex->m_cudaMipMappedArray, mipLevel)); + } + SLANG_ASSERT(dstArray); + + + // Check using the desc to see if it's plausible + { + CUDA_ARRAY_DESCRIPTOR arrayDesc; + SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray)); + + SLANG_ASSERT(mipWidth == arrayDesc.Width); + SLANG_ASSERT(mipHeight == arrayDesc.Height || (mipHeight == 1 && arrayDesc.Height == 0)); + } + + const void* srcDataPtr = nullptr; + + if (textureDesc.arrayLength > 1) + { + SLANG_ASSERT(baseShape == SLANG_TEXTURE_1D || baseShape == SLANG_TEXTURE_2D || baseShape == SLANG_TEXTURE_CUBE); + + // TODO(JS): Here I assume that arrays are just held contiguously within a 'face' + // This seems reasonable and works with the Copy3D. + const size_t faceSizeInBytes = elementSize * mipWidth * mipHeight; + + Index faceCount = textureDesc.arrayLength; + if (baseShape == SLANG_TEXTURE_CUBE) + { + faceCount *= 6; + } + + const size_t mipSizeInBytes = faceSizeInBytes * faceCount; + workspace.setCount(mipSizeInBytes); + + // We need to add the face data from each mip + // We iterate over face count so we copy all of the cubemap faces + for (Index j = 0; j < faceCount; j++) + { + const auto& srcData = texData.dataBuffer[mipLevel + j * mipLevels]; + // Copy over to the workspace to make contiguous + ::memcpy(workspace.begin() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes); + } + + srcDataPtr = workspace.getBuffer(); + } + else + { + if (baseShape == SLANG_TEXTURE_CUBE) + { + size_t faceSizeInBytes = elementSize * mipWidth * mipHeight; + + workspace.setCount(faceSizeInBytes * 6); + + // Copy the data over to make contiguous + for (Index j = 0; j < 6; j++) + { + const auto& srcData = texData.dataBuffer[mipLevels * j + mipLevel]; + SLANG_ASSERT(mipWidth * mipHeight == srcData.getCount()); + + ::memcpy(workspace.getBuffer() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes); + } + + srcDataPtr = workspace.getBuffer(); + } + else + { + const auto& srcData = texData.dataBuffer[mipLevel]; + SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount()); + + srcDataPtr = srcData.getBuffer(); + } + } + + if (textureDesc.arrayLength > 1) + { + SLANG_ASSERT(baseShape == SLANG_TEXTURE_1D || baseShape == SLANG_TEXTURE_2D || baseShape == SLANG_TEXTURE_CUBE); + + CUDA_MEMCPY3D copyParam; + memset(©Param, 0, sizeof(copyParam)); + + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = dstArray; + + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = srcDataPtr; + copyParam.srcPitch = mipWidth * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = mipHeight; + // Set the depth to the array length + copyParam.Depth = textureDesc.arrayLength; + + if (baseShape == SLANG_TEXTURE_CUBE) + { + copyParam.Depth *= 6; + } + + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(©Param)); + } + else + { + switch (baseShape) + { + case SLANG_TEXTURE_1D: + case SLANG_TEXTURE_2D: + { + CUDA_MEMCPY2D copyParam; + memset(©Param, 0, sizeof(copyParam)); + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = dstArray; + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = srcDataPtr; + copyParam.srcPitch = mipWidth * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = mipHeight; + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); + break; + } + case SLANG_TEXTURE_3D: + case SLANG_TEXTURE_CUBE: + { + CUDA_MEMCPY3D copyParam; + memset(©Param, 0, sizeof(copyParam)); + + copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.dstArray = dstArray; + + copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; + copyParam.srcHost = srcDataPtr; + copyParam.srcPitch = mipWidth * elementSize; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = mipHeight; + copyParam.Depth = mipDepth; + + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(©Param)); + break; + } + + default: + { + SLANG_ASSERT(!"Not implemented"); + break; + } + } + } + } + + // Set up texture sampling parameters, and create final texture obj + + { + CUDA_RESOURCE_DESC resDesc; + memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC)); + resDesc.resType = resourceType; + + if (tex->m_cudaArray) + { + resDesc.res.array.hArray = tex->m_cudaArray; + } + if (tex->m_cudaMipMappedArray) + { + resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; + } + + CUDA_TEXTURE_DESC texDesc; + memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); + texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; + texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + + SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr)); + } + + outResource = tex; + return SLANG_OK; +} + static SlangResult _compute(CUcontext context, CUmodule module, const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], CUDAComputeUtil::Context& outContext) { auto& bindSet = outContext.m_bindSet; @@ -388,10 +776,6 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp // Get the type kind, if typeLayout is not set we'll assume a 'constant buffer' will do slang::TypeReflection::Kind kind = typeLayout ? typeLayout->getKind() : slang::TypeReflection::Kind::ConstantBuffer; - // TODO(JS): - // Here we should be using information about what textures hold to create appropriate - // textures. For now we only support 2d textures that always return 1. - switch (kind) { case slang::TypeReflection::Kind::ConstantBuffer: @@ -408,299 +792,20 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp auto type = typeLayout->getType(); auto shape = type->getResourceShape(); - auto access = type->getResourceAccess(); - - CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY; - auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; switch (baseShape) { - case SLANG_TEXTURE_1D: + case SLANG_TEXTURE_1D: case SLANG_TEXTURE_2D: case SLANG_TEXTURE_3D: case SLANG_TEXTURE_CUBE: { - SLANG_ASSERT(value->m_userIndex >= 0); - auto& srcEntry = entries[value->m_userIndex]; - - slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); - - int count = 1; - if (typeReflection->getKind() == slang::TypeReflection::Kind::Vector) - { - count = int(typeReflection->getElementCount()); - } - - const auto& textureDesc = srcEntry.textureDesc; - - // CUDA wants the unused dimensions to be 0. - // Might need to specially handle elsewhere - int width = textureDesc.size; - int height = 0; - int depth = 0; - - switch (baseShape) - { - case SLANG_TEXTURE_1D: break; - case SLANG_TEXTURE_2D: - { - height = textureDesc.size; - break; - } - case SLANG_TEXTURE_3D: - { - height = textureDesc.size; - depth = textureDesc.size; - break; - } - case SLANG_TEXTURE_CUBE: - { - height = width; - depth = 6; - break; - } - } - - TextureData texData; - generateTextureData(texData, textureDesc); - - auto mipLevels = texData.mipLevels; - - RefPtr<CUDATextureResource> tex = new CUDATextureResource; - - size_t elementSize = 0; - - { - CUarray_format format = CU_AD_FORMAT_FLOAT; - int numChannels = 0; - - switch (textureDesc.format) - { - case Format::R_Float32: - { - format = CU_AD_FORMAT_FLOAT; - numChannels = 1; - elementSize = sizeof(float); - break; - } - case Format::RGBA_Unorm_UInt8: - { - format = CU_AD_FORMAT_UNSIGNED_INT8; - numChannels = 4; - elementSize = sizeof(uint32_t); - break; - } - default: - { - SLANG_ASSERT(!"Only support R_Float32/RGBA_Unorm_UInt8 formats for now"); - return SLANG_FAIL; - } - } - - if (mipLevels > 1) - { - resourceType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; - - CUDA_ARRAY3D_DESCRIPTOR arrayDesc; - memset(&arrayDesc, 0, sizeof(arrayDesc)); - - arrayDesc.Width = width; - arrayDesc.Height = height; - arrayDesc.Depth = depth; - arrayDesc.Format = format; - arrayDesc.NumChannels = numChannels; - arrayDesc.Flags = 0; - - if (baseShape == SLANG_TEXTURE_CUBE) - { - arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; - } - - SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayCreate(&tex->m_cudaMipMappedArray, &arrayDesc, mipLevels)); - } - else - { - resourceType = CU_RESOURCE_TYPE_ARRAY; - - if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE) - { - CUDA_ARRAY3D_DESCRIPTOR arrayDesc; - memset(&arrayDesc, 0, sizeof(arrayDesc)); - - // If we have a cubemap the depth is 6 - arrayDesc.Depth = depth; - arrayDesc.Height = height; - arrayDesc.Width = width; - arrayDesc.Format = format; - arrayDesc.NumChannels = numChannels; - - arrayDesc.Flags = 0; - - if (baseShape == SLANG_TEXTURE_CUBE) - { - arrayDesc.Depth = 6; - arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; - } - - SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); - } - else - { - CUDA_ARRAY_DESCRIPTOR arrayDesc; - memset(&arrayDesc, 0, sizeof(arrayDesc)); - - arrayDesc.Width = width; - arrayDesc.Height = height; - arrayDesc.Format = format; - arrayDesc.NumChannels = numChannels; - - // Allocate the array, will work for 1D or 2D case - SLANG_CUDA_RETURN_ON_FAIL(cuArrayCreate(&tex->m_cudaArray, &arrayDesc)); - } - } - } - - // Work space for holding data for uploading if it needs to be rearranged - List<uint8_t> workspace; - - for (int mipLevel = 0; mipLevel < mipLevels; ++mipLevel) - { - int mipWidth = width >> mipLevel; - int mipHeight = height >> mipLevel; - int mipDepth = depth >> mipLevel; - - mipWidth = (mipWidth == 0) ? 1 : mipWidth; - mipHeight = (mipHeight == 0) ? 1 : mipHeight; - mipDepth = (mipDepth == 0) ? 1 : mipDepth; - - // If it's a cubemap then the depth is always 6 - if (baseShape == SLANG_TEXTURE_CUBE) - { - mipDepth = 6; - } - - auto dstArray = tex->m_cudaArray; - if (tex->m_cudaMipMappedArray) - { - // Get the array for the mip level - SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&dstArray, tex->m_cudaMipMappedArray, mipLevel)); - } - SLANG_ASSERT(dstArray); - - - // Check using the desc to see if it's plausible - { - CUDA_ARRAY_DESCRIPTOR arrayDesc; - SLANG_CUDA_RETURN_ON_FAIL(cuArrayGetDescriptor(&arrayDesc, dstArray)); - - SLANG_ASSERT(mipWidth == arrayDesc.Width); - SLANG_ASSERT(mipHeight == arrayDesc.Height || (mipHeight == 1 && arrayDesc.Height == 0)); - } - - const void* srcDataPtr = nullptr; - - if (baseShape == SLANG_TEXTURE_CUBE) - { - size_t faceSizeInBytes = elementSize * mipWidth * mipHeight; - - workspace.setCount(faceSizeInBytes * 6); - - // Copy the data over to make contiguous - for (Index j = 0; j < 6; j++) - { - const auto& srcData = texData.dataBuffer[mipLevels * j + mipLevel]; - SLANG_ASSERT(mipWidth * mipHeight == srcData.getCount()); - - ::memcpy(workspace.getBuffer() + faceSizeInBytes * j, srcData.getBuffer(), faceSizeInBytes); - } - - srcDataPtr = workspace.getBuffer(); - } - else - { - const auto& srcData = texData.dataBuffer[mipLevel]; - SLANG_ASSERT(mipWidth * mipHeight * mipDepth == srcData.getCount()); - - srcDataPtr = srcData.getBuffer(); - } - - switch (baseShape) - { - case SLANG_TEXTURE_1D: - case SLANG_TEXTURE_2D: - { - CUDA_MEMCPY2D copyParam; - memset(©Param, 0, sizeof(copyParam)); - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = dstArray; - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = srcDataPtr; - copyParam.srcPitch = mipWidth * elementSize; - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = mipHeight; - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); - break; - } - case SLANG_TEXTURE_3D: - case SLANG_TEXTURE_CUBE: - { - CUDA_MEMCPY3D copyParam; - memset(©Param, 0, sizeof(copyParam)); - - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = dstArray; - - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = srcDataPtr; - copyParam.srcPitch = mipWidth * elementSize; - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = mipHeight; - copyParam.Depth = mipDepth; - - SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy3D(©Param)); - break; - } - - default: - { - SLANG_ASSERT(!"Not implemented"); - break; - } - } - } - - // set texture parameters - - { - CUDA_RESOURCE_DESC resDesc; - memset(&resDesc, 0, sizeof(CUDA_RESOURCE_DESC)); - resDesc.resType = resourceType; - - if (tex->m_cudaArray) - { - resDesc.res.array.hArray = tex->m_cudaArray; - } - if (tex->m_cudaMipMappedArray) - { - resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; - } - - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); - texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; - - SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr)); - } - - value->m_target = tex; + RefPtr<CUDAComputeUtil::ResourceBase> resource; + SLANG_RETURN_ON_FAIL(CUDAComputeUtil::createTextureResource(entries[value->m_userIndex], typeLayout, resource)); + value->m_target = resource; break; } - case SLANG_TEXTURE_BUFFER: { // Need a CUDA impl for these... diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h index f1ca65502..58a5bba33 100644 --- a/tools/render-test/cuda/cuda-compute-util.h +++ b/tools/render-test/cuda/cuda-compute-util.h @@ -40,6 +40,13 @@ struct CUDAComputeUtil List<BindSet::Value*> m_buffers; }; + class ResourceBase : public RefObject + { + public: + }; + + static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<ResourceBase>& outResource); + static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext); static bool canCreateDevice(); |
