diff options
Diffstat (limited to 'tools/gfx/cuda/render-cuda.cpp')
| -rw-r--r-- | tools/gfx/cuda/render-cuda.cpp | 232 |
1 files changed, 190 insertions, 42 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index 7abf51ef3..b97fc819c 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -242,6 +242,8 @@ public: CUarray m_cudaArray = CUarray(); CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray(); + void* m_cudaExternalMemory = nullptr; + RefPtr<CUDAContext> m_cudaContext; virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override @@ -1439,6 +1441,62 @@ public: return SLANG_OK; } + Result getCUDAFormat(Format format, CUarray_format* outFormat) + { + // TODO: Expand to cover all available formats that can be supported in CUDA + switch (format) + { + case Format::R32G32B32A32_FLOAT: + case Format::R32G32B32_FLOAT: + case Format::R32G32_FLOAT: + case Format::R32_FLOAT: + case Format::D32_FLOAT: + *outFormat = CU_AD_FORMAT_FLOAT; + return SLANG_OK; + case Format::R16G16B16A16_FLOAT: + case Format::R16G16_FLOAT: + case Format::R16_FLOAT: + *outFormat = CU_AD_FORMAT_HALF; + return SLANG_OK; + case Format::R32G32B32A32_UINT: + case Format::R32G32B32_UINT: + case Format::R32G32_UINT: + case Format::R32_UINT: + *outFormat = CU_AD_FORMAT_UNSIGNED_INT32; + return SLANG_OK; + case Format::R16G16B16A16_UINT: + case Format::R16G16_UINT: + case Format::R16_UINT: + *outFormat = CU_AD_FORMAT_UNSIGNED_INT16; + return SLANG_OK; + case Format::R8G8B8A8_UINT: + case Format::R8G8_UINT: + case Format::R8_UINT: + case Format::R8G8B8A8_UNORM: + *outFormat = CU_AD_FORMAT_UNSIGNED_INT8; + return SLANG_OK; + case Format::R32G32B32A32_SINT: + case Format::R32G32B32_SINT: + case Format::R32G32_SINT: + case Format::R32_SINT: + *outFormat = CU_AD_FORMAT_SIGNED_INT32; + return SLANG_OK; + case Format::R16G16B16A16_SINT: + case Format::R16G16_SINT: + case Format::R16_SINT: + *outFormat = CU_AD_FORMAT_SIGNED_INT16; + return SLANG_OK; + case Format::R8G8B8A8_SINT: + case Format::R8G8_SINT: + case Format::R8_SINT: + *outFormat = CU_AD_FORMAT_SIGNED_INT8; + return SLANG_OK; + default: + SLANG_ASSERT(!"Only support R32_FLOAT/R8G8B8A8_UNORM formats for now"); + return SLANG_FAIL; + } + } + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( const ITextureResource::Desc& desc, const ITextureResource::SubresourceData* initData, @@ -1489,45 +1547,34 @@ public: { CUarray_format format = CU_AD_FORMAT_FLOAT; int numChannels = 0; - - switch (desc.format) + + SLANG_RETURN_ON_FAIL(getCUDAFormat(desc.format, &format)); + FormatInfo info; + gfxGetFormatInfo(desc.format, &info); + numChannels = info.channelCount; + + switch (format) { - case Format::R32G32B32A32_FLOAT: - case Format::R32G32B32_FLOAT: - case Format::R32G32_FLOAT: - case Format::R32_FLOAT: - case Format::D32_FLOAT: - { - FormatInfo info; - gfxGetFormatInfo(desc.format, &info); - format = CU_AD_FORMAT_FLOAT; - numChannels = info.channelCount; - elementSize = sizeof(float) * numChannels; - break; - } - case Format::R16G16B16A16_FLOAT: - case Format::R16G16_FLOAT: - case Format::R16_FLOAT: - { - FormatInfo info; - gfxGetFormatInfo(desc.format, &info); - format = CU_AD_FORMAT_HALF; - numChannels = info.channelCount; - elementSize = sizeof(uint16_t) * numChannels; - break; - } - case Format::R8G8B8A8_UNORM: - { - format = CU_AD_FORMAT_UNSIGNED_INT8; - numChannels = 4; - elementSize = sizeof(uint32_t); - break; - } + case CU_AD_FORMAT_FLOAT: + { + elementSize = sizeof(float) * numChannels; + break; + } + case CU_AD_FORMAT_HALF: + { + elementSize = sizeof(uint16_t) * numChannels; + break; + } + case CU_AD_FORMAT_UNSIGNED_INT8: + { + elementSize = sizeof(uint32_t) * numChannels; + break; + } default: - { - SLANG_ASSERT(!"Only support R32_FLOAT/R8G8B8A8_UNORM formats for now"); - return SLANG_FAIL; - } + { + SLANG_ASSERT(!"Only support R32_FLOAT/R8G8B8A8_UNORM formats for now"); + return SLANG_FAIL; + } } if (desc.numMipLevels > 1) @@ -1951,6 +1998,83 @@ public: return SLANG_OK; } + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromSharedHandle( + InteropHandle handle, + const ITextureResource::Desc& desc, + const size_t size, + ITextureResource** outResource) override + { + if (handle.handleValue == 0) + { + *outResource = nullptr; + return SLANG_OK; + } + + RefPtr<TextureCUDAResource> resource = new TextureCUDAResource(desc); + resource->m_cudaContext = m_context; + + // CUDA manages sharing of buffers through the idea of an + // "external memory" object, which represents the relationship + // with another API's objects. In order to create this external + // memory association, we first need to fill in a descriptor struct. + CUDA_EXTERNAL_MEMORY_HANDLE_DESC externalMemoryHandleDesc; + memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc)); + switch (handle.api) + { + case InteropHandleAPI::D3D12: + externalMemoryHandleDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE; + break; + case InteropHandleAPI::Vulkan: + externalMemoryHandleDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32; + break; + default: + return SLANG_FAIL; + } + externalMemoryHandleDesc.handle.win32.handle = (void*)handle.handleValue; + externalMemoryHandleDesc.size = size; + externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated; + + CUexternalMemory externalMemory; + SLANG_CUDA_RETURN_ON_FAIL(cuImportExternalMemory(&externalMemory, &externalMemoryHandleDesc)); + resource->m_cudaExternalMemory = externalMemory; + + FormatInfo formatInfo; + SLANG_RETURN_ON_FAIL(gfxGetFormatInfo(desc.format, &formatInfo)); + CUDA_ARRAY3D_DESCRIPTOR arrayDesc; + arrayDesc.Depth = desc.size.depth; + arrayDesc.Height = desc.size.height; + arrayDesc.Width = desc.size.width; + arrayDesc.NumChannels = formatInfo.channelCount; + getCUDAFormat(desc.format, &arrayDesc.Format); + arrayDesc.Flags = 0; // TODO: Flags? CUDA_ARRAY_LAYERED/SURFACE_LDST/CUBEMAP/TEXTURE_GATHER + + CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC externalMemoryMipDesc; + memset(&externalMemoryMipDesc, 0, sizeof(externalMemoryMipDesc)); + externalMemoryMipDesc.offset = 0; + externalMemoryMipDesc.arrayDesc = arrayDesc; + externalMemoryMipDesc.numLevels = desc.numMipLevels; + + CUmipmappedArray mipArray; + SLANG_CUDA_RETURN_ON_FAIL(cuExternalMemoryGetMappedMipmappedArray(&mipArray, externalMemory, &externalMemoryMipDesc)); + resource->m_cudaMipMappedArray = mipArray; + + CUarray cuArray; + SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&cuArray, mipArray, 0)); + resource->m_cudaArray = cuArray; + + CUDA_RESOURCE_DESC surfDesc; + memset(&surfDesc, 0, sizeof(surfDesc)); + surfDesc.resType = CU_RESOURCE_TYPE_ARRAY; + surfDesc.res.array.hArray = cuArray; + + CUsurfObject surface; + SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&surface, &surfDesc)); + resource->m_cudaSurfObj = surface; + + returnComPtr(outResource, resource); + return SLANG_OK; + } + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) override { @@ -2183,12 +2307,36 @@ public: size_t* outRowPitch, size_t* outPixelSize) override { - SLANG_UNUSED(texture); - SLANG_UNUSED(outBlob); - SLANG_UNUSED(outRowPitch); - SLANG_UNUSED(outPixelSize); + auto textureImpl = static_cast<TextureCUDAResource*>(texture); + RefPtr<ListBlob> blob = new ListBlob(); - return SLANG_E_NOT_AVAILABLE; + auto desc = textureImpl->getDesc(); + auto width = desc->size.width; + auto height = desc->size.height; + FormatInfo sizeInfo; + SLANG_RETURN_ON_FAIL(gfxGetFormatInfo(desc->format, &sizeInfo)); + size_t pixelSize = sizeInfo.blockSizeInBytes / sizeInfo.pixelsPerBlock; + size_t rowPitch = width * pixelSize; + size_t size = height * rowPitch; + blob->m_data.setCount((Index)size); + + CUDA_MEMCPY2D copyParam; + memset(©Param, 0, sizeof(copyParam)); + + copyParam.srcMemoryType = CU_MEMORYTYPE_ARRAY; + copyParam.srcArray = textureImpl->m_cudaArray; + + copyParam.dstMemoryType = CU_MEMORYTYPE_HOST; + copyParam.dstHost = blob->m_data.getBuffer(); + copyParam.dstPitch = rowPitch; + copyParam.WidthInBytes = copyParam.dstPitch; + copyParam.Height = height; + SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(©Param)); + + *outRowPitch = rowPitch; + *outPixelSize = pixelSize; + returnComPtr(outBlob, blob); + return SLANG_OK; } virtual SLANG_NO_THROW Result SLANG_MCALL readBufferResource( |
