diff options
| author | jsmall-nvidia <jsmall@nvidia.com> | 2020-02-20 18:24:00 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-02-20 15:24:00 -0800 |
| commit | 1f401d04e32c6feaeb35243ea5bfc2b14520344b (patch) | |
| tree | 64394bc2f9fbef3dec3237c69604a0277d019f3c /tools/render-test/cuda | |
| parent | f9d99fde581c7dfdeb46e87f32da1fed8ac5441c (diff) | |
WIP on RWTexture types on CUDA/CPU (#1234)
* CUDA support for array of resources.
* * Add support for Texture2DArray on CPU
* Expand texture-simple.slang to test Texture2DArray
* Reorganise CUDAComputeUtil to split out createTextureResource.
* Add TextureCubeArray support for CPU/CUDA targets.
* Pulled out CUDAResource
Renamed derived classes to reflect that change.
* Creation of SurfObject type.
* Functions to return read/write access for simplifying future additions.
* WIP for RWTexture access on CPU/CUDA.
* CUsurfObject cannot have mips.
* Ability to set number of mips on test data.
Preliminary support for CUsurfObj and RWTexture1D on CUDA.
CUDA docs improvements.
* Fix typo.
Diffstat (limited to 'tools/render-test/cuda')
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.cpp | 139 | ||||
| -rw-r--r-- | tools/render-test/cuda/cuda-compute-util.h | 15 |
2 files changed, 106 insertions, 48 deletions
diff --git a/tools/render-test/cuda/cuda-compute-util.cpp b/tools/render-test/cuda/cuda-compute-util.cpp index bce98c1cb..b21b22b30 100644 --- a/tools/render-test/cuda/cuda-compute-util.cpp +++ b/tools/render-test/cuda/cuda-compute-util.cpp @@ -28,13 +28,13 @@ SLANG_FORCE_INLINE static bool _isError(cudaError_t result) { return result != 0 #define SLANG_CUDA_ASSERT_ON_FAIL(x) { auto _res = x; if (_isError(_res)) { SLANG_ASSERT(!"Failed CUDA call"); }; } -class CUDAResource : public CUDAComputeUtil::ResourceBase +class MemoryCUDAResource : public CUDAResource { public: - typedef CUDAComputeUtil::ResourceBase Super; + typedef CUDAResource Super; /// Dtor - ~CUDAResource() + ~MemoryCUDAResource() { if (m_cudaMemory) { @@ -42,27 +42,31 @@ public: } } - static CUDAResource* getCUDAResource(BindSet::Value* value) + static MemoryCUDAResource* asResource(BindSet::Value* value) { - return value ? dynamic_cast<CUDAResource*>(value->m_target.Ptr()) : nullptr; + return value ? dynamic_cast<MemoryCUDAResource*>(value->m_target.Ptr()) : nullptr; } - /// Helper function to get the cuda memory pointer when given a value + /// Helper function to get the CUDA memory pointer when given a value static CUdeviceptr getCUDAData(BindSet::Value* value) { - auto resource = getCUDAResource(value); + auto resource = asResource(value); return resource ? resource->m_cudaMemory : CUdeviceptr(); } CUdeviceptr m_cudaMemory = CUdeviceptr(); }; -class CUDATextureResource : public CUDAComputeUtil::ResourceBase +class TextureCUDAResource : public CUDAResource { public: - typedef CUDAComputeUtil::ResourceBase Super; + typedef CUDAResource Super; - ~CUDATextureResource() + ~TextureCUDAResource() { + if (m_cudaSurfObj) + { + SLANG_CUDA_ASSERT_ON_FAIL(cuSurfObjectDestroy(m_cudaSurfObj)); + } if (m_cudaTexObj) { SLANG_CUDA_ASSERT_ON_FAIL(cuTexObjectDestroy(m_cudaTexObj)); @@ -77,20 +81,30 @@ public: } } - static CUDATextureResource* getCUDATextureResource(BindSet::Value* value) + static TextureCUDAResource* asResource(BindSet::Value* value) { - return value ? dynamic_cast<CUDATextureResource*>(value->m_target.Ptr()) : nullptr; + return value ? dynamic_cast<TextureCUDAResource*>(value->m_target.Ptr()) : nullptr; } - static CUtexObject getCUDATexObject(BindSet::Value* value) + static CUtexObject getTexObject(BindSet::Value* value) { - auto resource = getCUDATextureResource(value); + auto resource = asResource(value); // It's an assumption here that 0 is okay for null. Seems to work... return resource ? resource->m_cudaTexObj : CUtexObject(0); } - // This is an opaque type, that's backed by a long long + static CUsurfObject getSurfObject(BindSet::Value* value) + { + auto resource = asResource(value); + return resource ? resource->m_cudaSurfObj : CUsurfObject(0); + } + + // The texObject is for reading 'texture' like things. This is an opaque type, that's backed by a long long CUtexObject m_cudaTexObj = CUtexObject(); + + // The surfObj is for reading/writing 'texture like' things, but not for sampling. + CUsurfObject m_cudaSurfObj = CUsurfObject(); + CUarray m_cudaArray = CUarray(); CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray(); }; @@ -335,20 +349,42 @@ public: return SLANG_SUCCEEDED(context.init(0)); } -/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<ResourceBase>& outResource) +static bool _hasReadAccess(SlangResourceAccess access) +{ + return access = SLANG_RESOURCE_ACCESS_READ || access == SLANG_RESOURCE_ACCESS_READ_WRITE; +} + +static bool _hasWriteAccess(SlangResourceAccess access) +{ + return access == SLANG_RESOURCE_ACCESS_READ_WRITE; +} + +/* static */SlangResult CUDAComputeUtil::createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource) { auto type = typeLayout->getType(); auto shape = type->getResourceShape(); auto access = type->getResourceAccess(); + if (!(access == SLANG_RESOURCE_ACCESS_READ || + access == SLANG_RESOURCE_ACCESS_READ_WRITE)) + { + SLANG_ASSERT(!"Only read or read write currently supported"); + return SLANG_FAIL; + } + CUresourcetype resourceType = CU_RESOURCE_TYPE_ARRAY; auto baseShape = shape & SLANG_RESOURCE_BASE_SHAPE_MASK; slang::TypeReflection* typeReflection = typeLayout->getResourceResultType(); - const auto& textureDesc = srcEntry.textureDesc; + InputTextureDesc textureDesc = srcEntry.textureDesc; + if (_hasWriteAccess(access)) + { + textureDesc.mipMapCount = 1; + } + // CUDA wants the unused dimensions to be 0. // Might need to specially handle elsewhere int width = textureDesc.size; @@ -384,13 +420,13 @@ public: return SLANG_FAIL; } } - + TextureData texData; generateTextureData(texData, textureDesc); auto mipLevels = texData.mipLevels; - RefPtr<CUDATextureResource> tex = new CUDATextureResource; + RefPtr<TextureCUDAResource> tex = new TextureCUDAResource; size_t elementSize = 0; @@ -486,6 +522,11 @@ public: arrayDesc.Format = format; arrayDesc.NumChannels = numChannels; + if (baseShape == SLANG_TEXTURE_CUBE) + { + arrayDesc.Flags |= CUDA_ARRAY3D_CUBEMAP; + } + SLANG_CUDA_RETURN_ON_FAIL(cuArray3DCreate(&tex->m_cudaArray, &arrayDesc)); } else if (baseShape == SLANG_TEXTURE_3D || baseShape == SLANG_TEXTURE_CUBE) @@ -553,7 +594,6 @@ public: } SLANG_ASSERT(dstArray); - // Check using the desc to see if it's plausible { CUDA_ARRAY_DESCRIPTOR arrayDesc; @@ -710,15 +750,25 @@ public: resDesc.res.mipmap.hMipmappedArray = tex->m_cudaMipMappedArray; } - CUDA_TEXTURE_DESC texDesc; - memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); - texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; - texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; - texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + if (_hasWriteAccess(access)) + { + // If has write access it's effectively UAV, and so doesn't have sampling available + SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc)); + } + else + { + // If read only it's a SRV and can sample, but cannot write + CUDA_TEXTURE_DESC texDesc; + memset(&texDesc, 0, sizeof(CUDA_TEXTURE_DESC)); + texDesc.addressMode[0] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[1] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.addressMode[2] = CU_TR_ADDRESS_MODE_WRAP; + texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR; + texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; + + SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr)); + } - SLANG_CUDA_RETURN_ON_FAIL(cuTexObjectCreate(&tex->m_cudaTexObj, &resDesc, &texDesc, nullptr)); } outResource = tex; @@ -782,7 +832,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case slang::TypeReflection::Kind::ParameterBlock: { // We can construct the buffers. We can't copy into yet, as we need to set all of the bindings first - RefPtr<CUDAResource> resource = new CUDAResource; + RefPtr<MemoryCUDAResource> resource = new MemoryCUDAResource; SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes)); value->m_target = resource; break; @@ -801,7 +851,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_TEXTURE_3D: case SLANG_TEXTURE_CUBE: { - RefPtr<CUDAComputeUtil::ResourceBase> resource; + RefPtr<CUDAResource> resource; SLANG_RETURN_ON_FAIL(CUDAComputeUtil::createTextureResource(entries[value->m_userIndex], typeLayout, resource)); value->m_target = resource; break; @@ -817,7 +867,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_STRUCTURED_BUFFER: { // On CPU we just use the memory in the BindSet buffer, so don't need to create anything - RefPtr<CUDAResource> resource = new CUDAResource; + RefPtr<MemoryCUDAResource> resource = new MemoryCUDAResource; SLANG_CUDA_RETURN_ON_FAIL(cuMemAlloc(&resource->m_cudaMemory, value->m_sizeInBytes)); value->m_target = resource; break; @@ -853,7 +903,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp if (elementCount == 0) { CUDAComputeUtil::Array array = { CUdeviceptr(), 0 }; - auto resource = CUDAResource::getCUDAResource(value); + auto resource = MemoryCUDAResource::asResource(value); if (resource) { array.data = resource->m_cudaMemory; @@ -868,7 +918,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case slang::TypeReflection::Kind::ParameterBlock: { // These map down to just pointers - *location.getUniform<CUdeviceptr>() = CUDAResource::getCUDAData(value); + *location.getUniform<CUdeviceptr>() = MemoryCUDAResource::getCUDAData(value); break; } case slang::TypeReflection::Kind::Resource: @@ -876,14 +926,14 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp auto type = typeLayout->getType(); auto shape = type->getResourceShape(); - //auto access = type->getResourceAccess(); + auto access = type->getResourceAccess(); switch (shape & SLANG_RESOURCE_BASE_SHAPE_MASK) { case SLANG_STRUCTURED_BUFFER: { CUDAComputeUtil::StructuredBuffer buffer = { CUdeviceptr(), 0 }; - auto resource = CUDAResource::getCUDAResource(value); + auto resource = MemoryCUDAResource::asResource(value); if (resource) { buffer.data = resource->m_cudaMemory; @@ -897,7 +947,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp { CUDAComputeUtil::ByteAddressBuffer buffer = { CUdeviceptr(), 0 }; - auto resource = CUDAResource::getCUDAResource(value); + auto resource = MemoryCUDAResource::asResource(value); if (resource) { buffer.data = resource->m_cudaMemory; @@ -912,7 +962,14 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp case SLANG_TEXTURE_3D: case SLANG_TEXTURE_CUBE: { - *location.getUniform<CUtexObject>() = CUDATextureResource::getCUDATexObject(value); + if (_hasWriteAccess(access)) + { + *location.getUniform<CUsurfObject>() = TextureCUDAResource::getSurfObject(value); + } + else + { + *location.getUniform<CUtexObject>() = TextureCUDAResource::getTexObject(value); + } break; } @@ -929,7 +986,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp const auto& values = bindSet.getValues(); for (BindSet::Value* value : values) { - CUdeviceptr cudaMem = CUDAResource::getCUDAData(value); + CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value); if (value && value->m_data && cudaMem) { // Okay copy the data over... @@ -950,8 +1007,8 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp SLANG_CUDA_RETURN_ON_FAIL(cuFuncGetAttribute(&sharedSizeInBytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, kernel)); // Work out the args - CUdeviceptr uniformCUDAData = CUDAResource::getCUDAData(bindRoot.getRootValue()); - CUdeviceptr entryPointCUDAData = CUDAResource::getCUDAData(bindRoot.getEntryPointValue()); + CUdeviceptr uniformCUDAData = MemoryCUDAResource::getCUDAData(bindRoot.getRootValue()); + CUdeviceptr entryPointCUDAData = MemoryCUDAResource::getCUDAData(bindRoot.getEntryPointValue()); // NOTE! These are pointers to the cuda memory pointers void* args[] = { &entryPointCUDAData , &uniformCUDAData }; @@ -987,7 +1044,7 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp if (entry.isOutput) { // Copy back to CPU memory - CUdeviceptr cudaMem = CUDAResource::getCUDAData(value); + CUdeviceptr cudaMem = MemoryCUDAResource::getCUDAData(value); if (value && value->m_data && cudaMem) { // Okay copy the data back... diff --git a/tools/render-test/cuda/cuda-compute-util.h b/tools/render-test/cuda/cuda-compute-util.h index 58a5bba33..f15c9d4e3 100644 --- a/tools/render-test/cuda/cuda-compute-util.h +++ b/tools/render-test/cuda/cuda-compute-util.h @@ -8,10 +8,16 @@ namespace renderer_test { +// Base class for CUDA resources. This includes textures but also +// memory allocations +class CUDAResource : public RefObject +{ +public: +}; struct CUDAComputeUtil { - // Define here, so we don't need to include the cude header + // Define here, so we don't need to include the CUDA header typedef size_t CUdeviceptr; /// NOTE! MUST match up to definitions in the CUDA prelude @@ -40,12 +46,7 @@ struct CUDAComputeUtil List<BindSet::Value*> m_buffers; }; - class ResourceBase : public RefObject - { - public: - }; - - static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<ResourceBase>& outResource); + static SlangResult createTextureResource(const ShaderInputLayoutEntry& srcEntry, slang::TypeLayoutReflection* typeLayout, RefPtr<CUDAResource>& outResource); static SlangResult execute(const ShaderCompilerUtil::OutputAndLayout& outputAndLayout, const uint32_t dispatchSize[3], Context& outContext); |
