From 1f401d04e32c6feaeb35243ea5bfc2b14520344b Mon Sep 17 00:00:00 2001 From: jsmall-nvidia Date: Thu, 20 Feb 2020 18:24:00 -0500 Subject: WIP on RWTexture types on CUDA/CPU (#1234) * CUDA support for array of resources. * * Add support for Texture2DArray on CPU * Expand texture-simple.slang to test Texture2DArray * Reorganise CUDAComputeUtil to split out createTextureResource. * Add TextureCubeArray support for CPU/CUDA targets. * Pulled out CUDAResource Renamed derived classes to reflect that change. * Creation of SurfObject type. * Functions to return read/write access for simplifying future additions. * WIP for RWTexture access on CPU/CUDA. * CUsurfObject cannot have mips. * Ability to set number of mips on test data. Preliminary support for CUsurfObj and RWTexture1D on CUDA. CUDA docs improvements. * Fix typo. --- tools/render-test/cpu-compute-util.cpp | 66 ++++++++++--- tools/render-test/cuda/cuda-compute-util.cpp | 139 +++++++++++++++++++-------- tools/render-test/cuda/cuda-compute-util.h | 15 +-- tools/render-test/shader-input-layout.cpp | 13 ++- tools/render-test/shader-input-layout.h | 1 + 5 files changed, 173 insertions(+), 61 deletions(-) (limited to 'tools/render-test') diff --git a/tools/render-test/cpu-compute-util.cpp b/tools/render-test/cpu-compute-util.cpp index 608da9461..3826ccec1 100644 --- a/tools/render-test/cpu-compute-util.cpp +++ b/tools/render-test/cpu-compute-util.cpp @@ -247,19 +247,61 @@ struct ValueTextureCubeArray : public CPUComputeUtil::Resource, public CPPPrelud float m_value; }; -static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int elemCount, float value) + +template +struct ValueRWTexture1D : public CPUComputeUtil::Resource, public CPPPrelude::IRWTexture1D +{ + void set(void* out) + { + float* dst = (float*)out; + for (int i = 0; i < COUNT; ++i) + { + dst[i] = m_value; + } + } + + virtual void Load(int32_t loc, void* out) SLANG_OVERRIDE + { + set(out); + } + + ValueRWTexture1D(float value) : + m_value(value) + { + m_interface = static_cast(this); + } + + float m_value; +}; + + +static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, SlangResourceAccess access, Index elemCount, float value) { switch (shape) { case SLANG_TEXTURE_1D: { - switch (elemCount) + if (access == SLANG_RESOURCE_ACCESS_READ_WRITE) { - case 1: return new ValueTexture1D<1>(value); - case 2: return new ValueTexture1D<2>(value); - case 3: return new ValueTexture1D<3>(value); - case 4: return new ValueTexture1D<4>(value); - default: break; + switch (elemCount) + { + case 1: return new ValueRWTexture1D<1>(value); + case 2: return new ValueRWTexture1D<2>(value); + case 3: return new ValueRWTexture1D<3>(value); + case 4: return new ValueRWTexture1D<4>(value); + default: break; + } + } + else + { + switch (elemCount) + { + case 1: return new ValueTexture1D<1>(value); + case 2: return new ValueTexture1D<2>(value); + case 3: return new ValueTexture1D<3>(value); + case 4: return new ValueTexture1D<4>(value); + default: break; + } } break; } @@ -388,7 +430,7 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int auto type = typeLayout->getType(); auto shape = type->getResourceShape(); - //auto access = type->getResourceAccess(); + auto access = type->getResourceAccess(); auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; switch (baseShape) @@ -407,22 +449,22 @@ static CPUComputeUtil::Resource* _newValueTexture(SlangResourceShape shape, int slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); - int count = 1; + Index count = 1; if (typeReflection->getKind() == slang::TypeReflection::Kind::Vector) { - count = int(typeReflection->getElementCount()); + count = Index(typeReflection->getElementCount()); } switch (srcEntry.textureDesc.content) { case InputTextureContent::One: { - value->m_target = _newValueTexture(shape, count, 1.0f); + value->m_target = _newValueTexture(shape, access, count, 1.0f); break; } case InputTextureContent::Zero: { - value->m_target = _newValueTexture(shape, count, 0.0f); + value->m_target = _newValueTexture(shape, access, count, 0.0f); break; } default: break; diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index bce98c1cb..b21b22b30 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -28,13 +28,13 @@ SLANG_FORCE_INLINE static bool _isError(cudaError_t result) { return result != 0 #define SLANG_CUDA_ASSERT_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); }; } -class CUDAResource : public CUDAComputeUtil::ResourceBase +class MemoryCUDAResource : public CUDAResource { public: - typedef CUDAComputeUtil::ResourceBase Super; + typedef CUDAResource Super; /// Dtor - ~CUDAResource() + ~MemoryCUDAResource() { if (m_cudaMemory) { @@ -42,27 +42,31 @@ public: } } - static CUDAResource* getCUDAResource(BindSet::Value* value) + static MemoryCUDAResource* asResource(BindSet::Value* value) { - return value ? dynamic_cast(value->m_target.Ptr()) : nullptr; + return value ? dynamic_cast(value->m_target.Ptr()) : nullptr; } - /// Helper function to get the cuda memory pointer when given a value + /// Helper function to get the CUDA memory pointer when given a value static CUdeviceptr getCUDAData(BindSet::Value* value) { - auto resource = getCUDAResource(value); + auto resource = asResource(value); return resource ? resource->m_cudaMemory : CUdeviceptr(); } CUdeviceptr m_cudaMemory = CUdeviceptr(); }; -class CUDATextureResource : public CUDAComputeUtil::ResourceBase +class TextureCUDAResource : public CUDAResource { public: - typedef CUDAComputeUtil::ResourceBase Super; + typedef CUDAResource Super; - ~CUDATextureResource() + ~TextureCUDAResource() { + if (m_cudaSurfObj) + { + SLANG_CUDA_ASSERT_ON_FAIL(cuSurfObjectDestroy(m_cudaSurfObj)); + } if (m_cudaTexObj) { SLANG_CUDA_ASSERT_ON_FAIL(cuTexObjectDestroy(m_cudaTexObj)); @@ -77,20 +81,30 @@ public: } } - static CUDATextureResource* getCUDATextureResource(BindSet::Value* value) + static TextureCUDAResource* asResource(BindSet::Value* value) { - return value ? dynamic_cast(value->m_target.Ptr()) : nullptr; + return value ? dynamic_cast(value->m_target.Ptr()) : nullptr; } - static CUtexObject getCUDATexObject(BindSet::Value* value) + static CUtexObject getTexObject(BindSet::Value* value) { - auto resource = getCUDATextureResource(value); + auto resource = asResource(value); // It's an assumption here that 0 is okay for null. Seems to work... return resource ? resource->m_cudaTexObj : CUtexObject(0); } - // This is an opaque type, that's backed by a long long + static CUsurfObject getSurfObject(BindSet::Value* value) + { + auto resource = asResource(value); + return resource ? resource->m_cudaSurfObj : CUsurfObject(0); + } + + // The texObject is for reading 'texture' like things. This is an opaque type, that's backed by a long long CUtexObject m_cudaTexObj = CUtexObject(); + + // The surfObj is for reading/writing 'texture like' things, but not for sampling. + CUsurfObject m_cudaSurfObj = CUsurfObject(); + CUarray m_cudaArray = CUarray(); CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray(); }; @@ -335,20 +349,42 @@ public: return SLANG_SUCCEEDED(context.init(0)); } -/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr& outResource) +static bool _hasReadAccess(SlangResourceAccess access) +{ + return access = SLANG_RESOURCE_ACCESS_READ || access == SLANG_RESOURCE_ACCESS_READ_WRITE; +} + +static bool _hasWriteAccess(SlangResourceAccess access) +{ + return access == SLANG_RESOURCE_ACCESS_READ_WRITE; +} + +/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr& outResource) { auto type = typeLayout->getType(); auto shape = type->getResourceShape(); auto access = type->getResourceAccess(); + if (!(access == SLANG_RESOURCE_ACCESS_READ || + access == SLANG_RESOURCE_ACCESS_READ_WRITE)) + { + SLANG_ASSERT(!"Only read or read write currently supported"); + return SLANG_FAIL; + } + CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY; auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); - const auto& textureDesc = srcEntry.textureDesc; + InputTextureDesc textureDesc = srcEntry.textureDesc; + if (_hasWriteAccess(access)) + { + textureDesc.mipMapCount = 1; + } + // CUDA wants the unused dimensions to be 0. // Might need to specially handle elsewhere int width = textureDesc.size; @@ -384,13 +420,13 @@ public: return SLANG_FAIL; } } - + TextureData texData; generateTextureData(texData, textureDesc); auto mipLevels = texData.mipLevels; - RefPtr tex = new CUDATextureResource; + RefPtr tex = new TextureCUDAResource; size_t elementSize = 0; @@ -486,6 +522,11 @@ public: arrayDesc.Format = format; arrayDesc.NumChannels = numChannels; + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; + } + SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); } else if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE) @@ -553,7 +594,6 @@ public: } SLANG_ASSERT(dstArray); - // Check using the desc to see if it's plausible { CUDA_ARRAY_DESCRIPTOR arrayDesc; @@ -710,15 +750,25 @@ public: resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; } - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); - texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + if (_hasWriteAccess(access)) + { + // If has write access it's effectively UAV, and so doesn't have sampling available + SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc)); + } + else + { + // If read only it's a SRV and can sample, but cannot write + CUDA_TEXTURE_DESC texDesc; + memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); + texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; + texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + + SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr)); + } - SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr)); } outResource = tex; @@ -782,7 +832,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case slang::TypeReflection::Kind::ParameterBlock: { // We can construct the buffers. We can't copy into yet, as we need to set all of the bindings first - RefPtr resource = new CUDAResource; + RefPtr resource = new MemoryCUDAResource; SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes)); value->m_target = resource; break; @@ -801,7 +851,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_TEXTURE_3D: case SLANG_TEXTURE_CUBE: { - RefPtr resource; + RefPtr resource; SLANG_RETURN_ON_FAIL(CUDAComputeUtil::createTextureResource(entries[value->m_userIndex], typeLayout, resource)); value->m_target = resource; break; @@ -817,7 +867,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_STRUCTURED_BUFFER: { // On CPU we just use the memory in the BindSet buffer, so don't need to create anything - RefPtr resource = new CUDAResource; + RefPtr resource = new MemoryCUDAResource; SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes)); value->m_target = resource; break; @@ -853,7 +903,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp if (elementCount == 0) { CUDAComputeUtil::Array array = { CUdeviceptr(), 0 }; - auto resource = CUDAResource::getCUDAResource(value); + auto resource = MemoryCUDAResource::asResource(value); if (resource) { array.data = resource->m_cudaMemory; @@ -868,7 +918,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case slang::TypeReflection::Kind::ParameterBlock: { // These map down to just pointers - *location.getUniform() = CUDAResource::getCUDAData(value); + *location.getUniform() = MemoryCUDAResource::getCUDAData(value); break; } case slang::TypeReflection::Kind::Resource: @@ -876,14 +926,14 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp auto type = typeLayout->getType(); auto shape = type->getResourceShape(); - //auto access = type->getResourceAccess(); + auto access = type->getResourceAccess(); switch (shape & SLANG_RESOURCE_BASE_SHAPE_MASK) { case SLANG_STRUCTURED_BUFFER: { CUDAComputeUtil::StructuredBuffer buffer = { CUdeviceptr(), 0 }; - auto resource = CUDAResource::getCUDAResource(value); + auto resource = MemoryCUDAResource::asResource(value); if (resource) { buffer.data = resource->m_cudaMemory; @@ -897,7 +947,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp { CUDAComputeUtil::ByteAddressBuffer buffer = { CUdeviceptr(), 0 }; - auto resource = CUDAResource::getCUDAResource(value); + auto resource = MemoryCUDAResource::asResource(value); if (resource) { buffer.data = resource->m_cudaMemory; @@ -912,7 +962,14 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_TEXTURE_3D: case SLANG_TEXTURE_CUBE: { - *location.getUniform() = CUDATextureResource::getCUDATexObject(value); + if (_hasWriteAccess(access)) + { + *location.getUniform() = TextureCUDAResource::getSurfObject(value); + } + else + { + *location.getUniform() = TextureCUDAResource::getTexObject(value); + } break; } @@ -929,7 +986,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp const auto& values = bindSet.getValues(); for (BindSet::Value* value : values) { - CUdeviceptr cudaMem = CUDAResource::getCUDAData(value); + CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value); if (value && value->m_data && cudaMem) { // Okay copy the data over... @@ -950,8 +1007,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp SLANG_CUDA_RETURN_ON_FAIL(cuFuncGetAttribute(&sharedSizeInBytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, kernel)); // Work out the args - CUdeviceptr uniformCUDAData = CUDAResource::getCUDAData(bindRoot.getRootValue()); - CUdeviceptr entryPointCUDAData = CUDAResource::getCUDAData(bindRoot.getEntryPointValue()); + CUdeviceptr uniformCUDAData = MemoryCUDAResource::getCUDAData(bindRoot.getRootValue()); + CUdeviceptr entryPointCUDAData = MemoryCUDAResource::getCUDAData(bindRoot.getEntryPointValue()); // NOTE! These are pointers to the cuda memory pointers void* args[] = { &entryPointCUDAData , &uniformCUDAData }; @@ -987,7 +1044,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp if (entry.isOutput) { // Copy back to CPU memory - CUdeviceptr cudaMem = CUDAResource::getCUDAData(value); + CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value); if (value && value->m_data && cudaMem) { // Okay copy the data back... diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h index 58a5bba33..f15c9d4e3 100644 --- a/tools/render-test/cuda/cuda-compute-util.h +++ b/tools/render-test/cuda/cuda-compute-util.h @@ -8,10 +8,16 @@ namespace renderer_test { +// Base class for CUDA resources. This includes textures but also +// memory allocations +class CUDAResource : public RefObject +{ +public: +}; struct CUDAComputeUtil { - // Define here, so we don't need to include the cude header + // Define here, so we don't need to include the CUDA header typedef size_t CUdeviceptr; /// NOTE! MUST match up to definitions in the CUDA prelude @@ -40,12 +46,7 @@ struct CUDAComputeUtil List m_buffers; }; - class ResourceBase : public RefObject - { - public: - }; - - static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr& outResource); + static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr& outResource); static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext); diff --git a/tools/render-test/shader-input-layout.cpp b/tools/render-test/shader-input-layout.cpp index 108483a2a..f9d6a60e1 100644 --- a/tools/render-test/shader-input-layout.cpp +++ b/tools/render-test/shader-input-layout.cpp @@ -452,6 +452,12 @@ namespace renderer_test entry.textureDesc.format = format; entry.bufferDesc.format = format; } + else if(word == "mipMaps") + { + parser.Read("="); + entry.textureDesc.mipMapCount = int(parser.ReadInt()); + } + if (parser.LookAhead(",")) parser.Read(","); else @@ -974,7 +980,12 @@ namespace renderer_test arraySize *= 6; output.arraySize = arraySize; output.textureSize = inputDesc.size; - output.mipLevels = Math::Log2Floor(output.textureSize) + 1; + + const Index maxMipLevels = Math::Log2Floor(output.textureSize) + 1; + Index mipLevels = (inputDesc.mipMapCount <= 0) ? maxMipLevels : inputDesc.mipMapCount; + mipLevels = (mipLevels > maxMipLevels) ? maxMipLevels : mipLevels; + + output.mipLevels = int(mipLevels); output.dataBuffer.setCount(output.mipLevels * output.arraySize); int slice = 0; diff --git a/tools/render-test/shader-input-layout.h b/tools/render-test/shader-input-layout.h index a9d525d47..0831f73bb 100644 --- a/tools/render-test/shader-input-layout.h +++ b/tools/render-test/shader-input-layout.h @@ -33,6 +33,7 @@ struct InputTextureDesc bool isDepthTexture = false; bool isRWTexture = false; int size = 4; + int mipMapCount = 0; ///< 0 means the maximum number of mips will be bound Format format = Format::RGBA_Unorm_UInt8; -- cgit v1.2.3