summaryrefslogtreecommitdiffstats
path: root/tools/gfx/cuda
diff options
context:
space:
mode:
authorlucy96chen <47800040+lucy96chen@users.noreply.github.com>2021-12-08 11:38:14 -0800
committerGitHub <noreply@github.com>2021-12-08 11:38:14 -0800
commit9606401e1de1002e3ad070bc5c6384fa5bc4d9ff (patch)
tree7a557a14fadf5220a34f6df9faf1a5535100743f /tools/gfx/cuda
parent90d8af888b40c83b33f9f0c037bd2ab8c19a35f4 (diff)
D3D12 and Vulkan to CUDA Texture Sharing (#2038)
Diffstat (limited to 'tools/gfx/cuda')
-rw-r--r--tools/gfx/cuda/render-cuda.cpp232
1 files changed, 190 insertions, 42 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index 7abf51ef3..b97fc819c 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -242,6 +242,8 @@ public:
CUarray m_cudaArray = CUarray();
CUmipmappedArray m_cudaMipMappedArray = CUmipmappedArray();
+ void* m_cudaExternalMemory = nullptr;
+
RefPtr<CUDAContext> m_cudaContext;
virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) override
@@ -1439,6 +1441,62 @@ public:
return SLANG_OK;
}
+ Result getCUDAFormat(Format format, CUarray_format* outFormat)
+ {
+ // TODO: Expand to cover all available formats that can be supported in CUDA
+ switch (format)
+ {
+ case Format::R32G32B32A32_FLOAT:
+ case Format::R32G32B32_FLOAT:
+ case Format::R32G32_FLOAT:
+ case Format::R32_FLOAT:
+ case Format::D32_FLOAT:
+ *outFormat = CU_AD_FORMAT_FLOAT;
+ return SLANG_OK;
+ case Format::R16G16B16A16_FLOAT:
+ case Format::R16G16_FLOAT:
+ case Format::R16_FLOAT:
+ *outFormat = CU_AD_FORMAT_HALF;
+ return SLANG_OK;
+ case Format::R32G32B32A32_UINT:
+ case Format::R32G32B32_UINT:
+ case Format::R32G32_UINT:
+ case Format::R32_UINT:
+ *outFormat = CU_AD_FORMAT_UNSIGNED_INT32;
+ return SLANG_OK;
+ case Format::R16G16B16A16_UINT:
+ case Format::R16G16_UINT:
+ case Format::R16_UINT:
+ *outFormat = CU_AD_FORMAT_UNSIGNED_INT16;
+ return SLANG_OK;
+ case Format::R8G8B8A8_UINT:
+ case Format::R8G8_UINT:
+ case Format::R8_UINT:
+ case Format::R8G8B8A8_UNORM:
+ *outFormat = CU_AD_FORMAT_UNSIGNED_INT8;
+ return SLANG_OK;
+ case Format::R32G32B32A32_SINT:
+ case Format::R32G32B32_SINT:
+ case Format::R32G32_SINT:
+ case Format::R32_SINT:
+ *outFormat = CU_AD_FORMAT_SIGNED_INT32;
+ return SLANG_OK;
+ case Format::R16G16B16A16_SINT:
+ case Format::R16G16_SINT:
+ case Format::R16_SINT:
+ *outFormat = CU_AD_FORMAT_SIGNED_INT16;
+ return SLANG_OK;
+ case Format::R8G8B8A8_SINT:
+ case Format::R8G8_SINT:
+ case Format::R8_SINT:
+ *outFormat = CU_AD_FORMAT_SIGNED_INT8;
+ return SLANG_OK;
+ default:
+ SLANG_ASSERT(!"Only support R32_FLOAT/R8G8B8A8_UNORM formats for now");
+ return SLANG_FAIL;
+ }
+ }
+
virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource(
const ITextureResource::Desc& desc,
const ITextureResource::SubresourceData* initData,
@@ -1489,45 +1547,34 @@ public:
{
CUarray_format format = CU_AD_FORMAT_FLOAT;
int numChannels = 0;
-
- switch (desc.format)
+
+ SLANG_RETURN_ON_FAIL(getCUDAFormat(desc.format, &format));
+ FormatInfo info;
+ gfxGetFormatInfo(desc.format, &info);
+ numChannels = info.channelCount;
+
+ switch (format)
{
- case Format::R32G32B32A32_FLOAT:
- case Format::R32G32B32_FLOAT:
- case Format::R32G32_FLOAT:
- case Format::R32_FLOAT:
- case Format::D32_FLOAT:
- {
- FormatInfo info;
- gfxGetFormatInfo(desc.format, &info);
- format = CU_AD_FORMAT_FLOAT;
- numChannels = info.channelCount;
- elementSize = sizeof(float) * numChannels;
- break;
- }
- case Format::R16G16B16A16_FLOAT:
- case Format::R16G16_FLOAT:
- case Format::R16_FLOAT:
- {
- FormatInfo info;
- gfxGetFormatInfo(desc.format, &info);
- format = CU_AD_FORMAT_HALF;
- numChannels = info.channelCount;
- elementSize = sizeof(uint16_t) * numChannels;
- break;
- }
- case Format::R8G8B8A8_UNORM:
- {
- format = CU_AD_FORMAT_UNSIGNED_INT8;
- numChannels = 4;
- elementSize = sizeof(uint32_t);
- break;
- }
+ case CU_AD_FORMAT_FLOAT:
+ {
+ elementSize = sizeof(float) * numChannels;
+ break;
+ }
+ case CU_AD_FORMAT_HALF:
+ {
+ elementSize = sizeof(uint16_t) * numChannels;
+ break;
+ }
+ case CU_AD_FORMAT_UNSIGNED_INT8:
+ {
+ elementSize = sizeof(uint32_t) * numChannels;
+ break;
+ }
default:
- {
- SLANG_ASSERT(!"Only support R32_FLOAT/R8G8B8A8_UNORM formats for now");
- return SLANG_FAIL;
- }
+ {
+ SLANG_ASSERT(!"Only support R32_FLOAT/R8G8B8A8_UNORM formats for now");
+ return SLANG_FAIL;
+ }
}
if (desc.numMipLevels > 1)
@@ -1951,6 +1998,83 @@ public:
return SLANG_OK;
}
+ virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromSharedHandle(
+ InteropHandle handle,
+ const ITextureResource::Desc& desc,
+ const size_t size,
+ ITextureResource** outResource) override
+ {
+ if (handle.handleValue == 0)
+ {
+ *outResource = nullptr;
+ return SLANG_OK;
+ }
+
+ RefPtr<TextureCUDAResource> resource = new TextureCUDAResource(desc);
+ resource->m_cudaContext = m_context;
+
+ // CUDA manages sharing of buffers through the idea of an
+ // "external memory" object, which represents the relationship
+ // with another API's objects. In order to create this external
+ // memory association, we first need to fill in a descriptor struct.
+ CUDA_EXTERNAL_MEMORY_HANDLE_DESC externalMemoryHandleDesc;
+ memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc));
+ switch (handle.api)
+ {
+ case InteropHandleAPI::D3D12:
+ externalMemoryHandleDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE;
+ break;
+ case InteropHandleAPI::Vulkan:
+ externalMemoryHandleDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32;
+ break;
+ default:
+ return SLANG_FAIL;
+ }
+ externalMemoryHandleDesc.handle.win32.handle = (void*)handle.handleValue;
+ externalMemoryHandleDesc.size = size;
+ externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated;
+
+ CUexternalMemory externalMemory;
+ SLANG_CUDA_RETURN_ON_FAIL(cuImportExternalMemory(&externalMemory, &externalMemoryHandleDesc));
+ resource->m_cudaExternalMemory = externalMemory;
+
+ FormatInfo formatInfo;
+ SLANG_RETURN_ON_FAIL(gfxGetFormatInfo(desc.format, &formatInfo));
+ CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
+ arrayDesc.Depth = desc.size.depth;
+ arrayDesc.Height = desc.size.height;
+ arrayDesc.Width = desc.size.width;
+ arrayDesc.NumChannels = formatInfo.channelCount;
+ getCUDAFormat(desc.format, &arrayDesc.Format);
+ arrayDesc.Flags = 0; // TODO: Flags? CUDA_ARRAY_LAYERED/SURFACE_LDST/CUBEMAP/TEXTURE_GATHER
+
+ CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC externalMemoryMipDesc;
+ memset(&externalMemoryMipDesc, 0, sizeof(externalMemoryMipDesc));
+ externalMemoryMipDesc.offset = 0;
+ externalMemoryMipDesc.arrayDesc = arrayDesc;
+ externalMemoryMipDesc.numLevels = desc.numMipLevels;
+
+ CUmipmappedArray mipArray;
+ SLANG_CUDA_RETURN_ON_FAIL(cuExternalMemoryGetMappedMipmappedArray(&mipArray, externalMemory, &externalMemoryMipDesc));
+ resource->m_cudaMipMappedArray = mipArray;
+
+ CUarray cuArray;
+ SLANG_CUDA_RETURN_ON_FAIL(cuMipmappedArrayGetLevel(&cuArray, mipArray, 0));
+ resource->m_cudaArray = cuArray;
+
+ CUDA_RESOURCE_DESC surfDesc;
+ memset(&surfDesc, 0, sizeof(surfDesc));
+ surfDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+ surfDesc.res.array.hArray = cuArray;
+
+ CUsurfObject surface;
+ SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&surface, &surfDesc));
+ resource->m_cudaSurfObj = surface;
+
+ returnComPtr(outResource, resource);
+ return SLANG_OK;
+ }
+
virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView(
ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) override
{
@@ -2183,12 +2307,36 @@ public:
size_t* outRowPitch,
size_t* outPixelSize) override
{
- SLANG_UNUSED(texture);
- SLANG_UNUSED(outBlob);
- SLANG_UNUSED(outRowPitch);
- SLANG_UNUSED(outPixelSize);
+ auto textureImpl = static_cast<TextureCUDAResource*>(texture);
+ RefPtr<ListBlob> blob = new ListBlob();
- return SLANG_E_NOT_AVAILABLE;
+ auto desc = textureImpl->getDesc();
+ auto width = desc->size.width;
+ auto height = desc->size.height;
+ FormatInfo sizeInfo;
+ SLANG_RETURN_ON_FAIL(gfxGetFormatInfo(desc->format, &sizeInfo));
+ size_t pixelSize = sizeInfo.blockSizeInBytes / sizeInfo.pixelsPerBlock;
+ size_t rowPitch = width * pixelSize;
+ size_t size = height * rowPitch;
+ blob->m_data.setCount((Index)size);
+
+ CUDA_MEMCPY2D copyParam;
+ memset(&copyParam, 0, sizeof(copyParam));
+
+ copyParam.srcMemoryType = CU_MEMORYTYPE_ARRAY;
+ copyParam.srcArray = textureImpl->m_cudaArray;
+
+ copyParam.dstMemoryType = CU_MEMORYTYPE_HOST;
+ copyParam.dstHost = blob->m_data.getBuffer();
+ copyParam.dstPitch = rowPitch;
+ copyParam.WidthInBytes = copyParam.dstPitch;
+ copyParam.Height = height;
+ SLANG_CUDA_RETURN_ON_FAIL(cuMemcpy2D(&copyParam));
+
+ *outRowPitch = rowPitch;
+ *outPixelSize = pixelSize;
+ returnComPtr(outBlob, blob);
+ return SLANG_OK;
}
virtual SLANG_NO_THROW Result SLANG_MCALL readBufferResource(