diff options
| author | lucy96chen <47800040+lucy96chen@users.noreply.github.com> | 2021-11-09 11:59:43 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-11-09 11:59:43 -0800 |
| commit | 4d4cd569ad7fcc88693c18f848603f18894e24be (patch) | |
| tree | 4c7fb036eea9e259d1610a933b448a5d0edcd918 /tools/gfx/cuda/render-cuda.cpp | |
| parent | 8fe3f9cd7d664fc98e33cf276427390b42b9b468 (diff) | |
Allow buffers to be shared between D3D12 and CUDA (#2005)
* Added both the SharedHandle struct containing a handle and the API the handle originated from and the getSharedHandle() method to IResource, which returns a Windows system handle for the resource that can then be shared between multiple APIs (currently only fully implemented for D3D12); Added createTextureFromNativeHandle() and createBufferFromNativeHandle() to IDevice, which creates a buffer or texture resource using the provided handle (currently only fully implemented for D3D12); Added createBufferFromSharedHandle() to IDevice, which creates a BufferResource using the provided system handle (currently only fully implemented for the D3D12 to CUDA interface); Provided a proper implementation for CUDADevice::getNativeHandle(); Added several new tests testing the aforementioned implementations; Moved NativeHandle and getNativeHandle() for IBufferResource and ITextureResource up a layer into IResource and renamed to NativeResourceHandle; Modified NativeResourceHandle to be a struct containing the handle and the API it originated from and propagated these changes where appropriate
* Combined all native and shared handle representations into a unified InteropHandle struct which tracks the handle's value and source API; Modified all getNativeHandle() and getSharedHandle() variants to operate on InteropHandle and modified all affected files
* D3D12 buffers and textures are now responsible for closing their shared handles if they exist; Renamed IDevice::getNativeHandle() to getNativeDeviceHandles()
* Fixed getNativeDeviceHandles() in render-cuda to match updated method elsewhere
* Temporarily disabling existingDeviceHandleCUDA and sharedHandleD3D12ToCUDA due to currently unreproducable test failures on TC
Diffstat (limited to 'tools/gfx/cuda/render-cuda.cpp')
| -rw-r--r-- | tools/gfx/cuda/render-cuda.cpp | 75 |
1 files changed, 70 insertions, 5 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp index c7b16fc66..2dc1bc1e1 100644 --- a/tools/gfx/cuda/render-cuda.cpp +++ b/tools/gfx/cuda/render-cuda.cpp @@ -186,6 +186,7 @@ public: uint64_t getBindlessHandle() { return (uint64_t)m_cudaMemory; } + void* m_cudaExternalMemory = nullptr; void* m_cudaMemory = nullptr; RefPtr<CUDAContext> m_cudaContext; @@ -195,9 +196,10 @@ public: return (DeviceAddress)m_cudaMemory; } - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(NativeHandle* outHandle) override + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override { - *outHandle = getBindlessHandle(); + outHandle->handleValue = getBindlessHandle(); + outHandle->api = InteropHandleAPI::CUDA; return SLANG_OK; } }; @@ -242,9 +244,10 @@ public: RefPtr<CUDAContext> m_cudaContext; - virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(NativeHandle* outHandle) override + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override { - *outHandle = getBindlessHandle(); + outHandle->handleValue = getBindlessHandle(); + outHandle->api = InteropHandleAPI::CUDA; return SLANG_OK; } }; @@ -892,6 +895,13 @@ private: String m_adapterName; public: + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeDeviceHandles(InteropHandles* outHandles) override + { + outHandles->handles[0].handleValue = (uint64_t)m_device; + outHandles->handles[0].api = InteropHandleAPI::CUDA; + return SLANG_OK; + } + class CommandQueueImpl; class CommandBufferImpl @@ -1332,6 +1342,8 @@ public: m_context = new CUDAContext(); + int count = -1; + cuDeviceGetCount(&count); SLANG_CUDA_RETURN_ON_FAIL(cuDeviceGet(&m_device, m_deviceIndex)); SLANG_CUDA_RETURN_WITH_REPORT_ON_FAIL( @@ -1810,6 +1822,59 @@ public: return SLANG_OK; } + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferFromSharedHandle( + InteropHandle handle, + const IBufferResource::Desc& desc, + IBufferResource** outResource) override + { + if (handle.handleValue == 0) + { + *outResource = nullptr; + return SLANG_OK; + } + + RefPtr<MemoryCUDAResource> resource = new MemoryCUDAResource(desc); + resource->m_cudaContext = m_context; + + // CUDA manages sharing of buffers through the idea of an + // "external memory" object, which represents the relationship + // with another API's objects. In order to create this external + // memory association, we first need to fill in a descriptor struct. + cudaExternalMemoryHandleDesc externalMemoryHandleDesc; + memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc)); + // TODO: Change according to the type of handle being passed in + externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource; + externalMemoryHandleDesc.handle.win32.handle = (void*)handle.handleValue; + externalMemoryHandleDesc.size = desc.sizeInBytes; + externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated; + + // Once we have filled in the descriptor, we can request + // that CUDA create the required association between the + // external buffer and its own memory. + cudaExternalMemory_t externalMemory; + SLANG_CUDA_RETURN_ON_FAIL(cudaImportExternalMemory(&externalMemory, &externalMemoryHandleDesc)); + resource->m_cudaExternalMemory = externalMemory; + + // The CUDA "external memory" handle is not itself a device + // pointer, so we need to query for a suitable device address + // for the buffer with another call. + // + // Just as for the external memory, we fill in a descriptor + // structure (although in this case we only need to specify + // the size). + cudaExternalMemoryBufferDesc bufferDesc; + memset(&bufferDesc, 0, sizeof(bufferDesc)); + bufferDesc.size = desc.sizeInBytes; + + // Finally, we can "map" the buffer to get a device address. + void* deviceAddress; + SLANG_CUDA_RETURN_ON_FAIL(cudaExternalMemoryGetMappedBuffer(&deviceAddress, externalMemory, &bufferDesc)); + resource->m_cudaMemory = deviceAddress; + + returnComPtr(outResource, resource); + return SLANG_OK; + } + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) override { @@ -2157,7 +2222,7 @@ SlangResult SLANG_MCALL createCUDADevice(const IDevice::Desc* desc, IDevice** ou { SLANG_UNUSED(desc); *outDevice = nullptr; - return SLANG_OK; + return SLANG_FAIL; } #endif |
