From ce6e946f6f4882aba8a62392ae791c948633e2e3 Mon Sep 17 00:00:00 2001 From: lucy96chen <47800040+lucy96chen@users.noreply.github.com> Date: Mon, 25 Jul 2022 13:36:43 -0700 Subject: Split render-cuda.cpp into smaller files (#2334) * render-cuda split, compile errors galore due to missing includes etc. * render-cuda split and fully compiles * Ran premake.bat to disable cuda; Added all new files * Removed render-cuda files * CI fixes * Rerun CI --- tools/gfx/cuda/cuda-shader-object.cpp | 349 ++++++++++++++++++++++++++++++++++ 1 file changed, 349 insertions(+) create mode 100644 tools/gfx/cuda/cuda-shader-object.cpp (limited to 'tools/gfx/cuda/cuda-shader-object.cpp') diff --git a/tools/gfx/cuda/cuda-shader-object.cpp b/tools/gfx/cuda/cuda-shader-object.cpp new file mode 100644 index 000000000..3fc55e401 --- /dev/null +++ b/tools/gfx/cuda/cuda-shader-object.cpp @@ -0,0 +1,349 @@ +// cuda-shader-object.cpp +#include "cuda-shader-object.h" + +#include "cuda-shader-object-layout.h" +#include "cuda-resource-views.h" + +#include "cuda-helper-functions.h" + +namespace gfx +{ +#ifdef GFX_ENABLE_CUDA +using namespace Slang; + +namespace cuda +{ +Result ShaderObjectData::setCount(Index count) +{ + if (isHostOnly) + { + m_cpuBuffer.setCount(count); + if (!m_bufferView) + { + IResourceView::Desc viewDesc = {}; + viewDesc.type = IResourceView::Type::UnorderedAccess; + m_bufferView = new ResourceViewImpl(); + m_bufferView->proxyBuffer = m_cpuBuffer.getBuffer(); + m_bufferView->m_desc = viewDesc; + } + return SLANG_OK; + } + + if (!m_bufferResource) + { + IBufferResource::Desc desc; + desc.type = IResource::Type::Buffer; + desc.sizeInBytes = count; + m_bufferResource = new BufferResourceImpl(desc); + if (count) + { + SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc(&m_bufferResource->m_cudaMemory, (size_t)count)); + } + IResourceView::Desc viewDesc = {}; + viewDesc.type = IResourceView::Type::UnorderedAccess; + m_bufferView = new ResourceViewImpl(); + m_bufferView->memoryResource = m_bufferResource; + m_bufferView->m_desc = viewDesc; + } + auto oldSize = m_bufferResource->getDesc()->sizeInBytes; + if ((size_t)count != oldSize) + { + void* newMemory = nullptr; + if (count) + { + SLANG_CUDA_RETURN_ON_FAIL(cudaMalloc(&newMemory, (size_t)count)); + } + if (oldSize) + { + SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy( + newMemory, + m_bufferResource->m_cudaMemory, + Math::Min((size_t)count, oldSize), + cudaMemcpyDefault)); + } + cudaFree(m_bufferResource->m_cudaMemory); + m_bufferResource->m_cudaMemory = newMemory; + m_bufferResource->getDesc()->sizeInBytes = count; + } + return SLANG_OK; +} + +Slang::Index ShaderObjectData::getCount() +{ + if (isHostOnly) + return m_cpuBuffer.getCount(); + if (m_bufferResource) + return (Slang::Index)(m_bufferResource->getDesc()->sizeInBytes); + else + return 0; +} + +void* ShaderObjectData::getBuffer() +{ + if (isHostOnly) + return m_cpuBuffer.getBuffer(); + + if (m_bufferResource) + return m_bufferResource->m_cudaMemory; + return nullptr; +} + +/// Returns a resource view for GPU access into the buffer content. +ResourceViewBase* ShaderObjectData::getResourceView( + RendererBase* device, + slang::TypeLayoutReflection* elementLayout, + slang::BindingType bindingType) +{ + SLANG_UNUSED(device); + m_bufferResource->getDesc()->elementSize = (int)elementLayout->getSize(); + return m_bufferView.Ptr(); +} + +SlangResult ShaderObjectImpl::init(IDevice* device, ShaderObjectLayoutImpl* typeLayout) +{ + m_layout = typeLayout; + + // If the layout tells us that there is any uniform data, + // then we need to allocate a constant buffer to hold that data. + // + // TODO: Do we need to allocate a shadow copy for use from + // the CPU? + // + // TODO: When/where do we bind this constant buffer into + // a descriptor set for later use? + // + auto slangLayout = getLayout()->getElementTypeLayout(); + size_t uniformSize = slangLayout->getSize(); + if (uniformSize) + { + m_data.setCount((Index)uniformSize); + } + + // If the layout specifies that we have any resources or sub-objects, + // then we need to size the appropriate arrays to account for them. + // + // Note: the counts here are the *total* number of resources/sub-objects + // and not just the number of resource/sub-object ranges. + // + resources.setCount(typeLayout->getResourceCount()); + m_objects.setCount(typeLayout->getSubObjectCount()); + + for (auto subObjectRange : getLayout()->subObjectRanges) + { + RefPtr subObjectLayout = subObjectRange.layout; + + // In the case where the sub-object range represents an + // existential-type leaf field (e.g., an `IBar`), we + // cannot pre-allocate the object(s) to go into that + // range, since we can't possibly know what to allocate + // at this point. + // + if (!subObjectLayout) + continue; + // + // Otherwise, we will allocate a sub-object to fill + // in each entry in this range, based on the layout + // information we already have. + + auto& bindingRangeInfo = getLayout()->m_bindingRanges[subObjectRange.bindingRangeIndex]; + for (Index i = 0; i < bindingRangeInfo.count; ++i) + { + RefPtr subObject = new ShaderObjectImpl(); + SLANG_RETURN_ON_FAIL(subObject->init(device, subObjectLayout)); + + ShaderOffset offset; + offset.uniformOffset = bindingRangeInfo.uniformOffset + sizeof(void*) * i; + offset.bindingRangeIndex = (GfxIndex)subObjectRange.bindingRangeIndex; + offset.bindingArrayIndex = (GfxIndex)i; + + SLANG_RETURN_ON_FAIL(setObject(offset, subObject)); + } + } + return SLANG_OK; +} + +SLANG_NO_THROW GfxCount SLANG_MCALL ShaderObjectImpl::getEntryPointCount() +{ + return 0; +} + +SLANG_NO_THROW Result SLANG_MCALL + ShaderObjectImpl::getEntryPoint(GfxIndex index, IShaderObject** outEntryPoint) +{ + *outEntryPoint = nullptr; + return SLANG_OK; +} + +SLANG_NO_THROW const void* SLANG_MCALL ShaderObjectImpl::getRawData() +{ + return m_data.getBuffer(); +} + +SLANG_NO_THROW Size SLANG_MCALL ShaderObjectImpl::getSize() +{ + return (Size)m_data.getCount(); +} + +SLANG_NO_THROW Result SLANG_MCALL + ShaderObjectImpl::setData(ShaderOffset const& offset, void const* data, Size size) +{ + Size temp = m_data.getCount() - (Size)offset.uniformOffset; + size = Math::Min(size, temp); + SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy( + (uint8_t*)m_data.getBuffer() + offset.uniformOffset, data, size, cudaMemcpyDefault)); + return SLANG_OK; +} + +SLANG_NO_THROW Result SLANG_MCALL + ShaderObjectImpl::setResource(ShaderOffset const& offset, IResourceView* resourceView) +{ + if (!resourceView) + return SLANG_OK; + + auto layout = getLayout(); + + auto bindingRangeIndex = offset.bindingRangeIndex; + SLANG_ASSERT(bindingRangeIndex >= 0); + SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount()); + + auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex]; + + auto viewIndex = bindingRange.baseIndex + offset.bindingArrayIndex; + auto cudaView = static_cast(resourceView); + + resources[viewIndex] = cudaView; + + if (cudaView->textureResource) + { + if (cudaView->m_desc.type == IResourceView::Type::UnorderedAccess) + { + auto handle = cudaView->textureResource->m_cudaSurfObj; + setData(offset, &handle, sizeof(uint64_t)); + } + else + { + auto handle = cudaView->textureResource->getBindlessHandle(); + setData(offset, &handle, sizeof(uint64_t)); + } + } + else if (cudaView->memoryResource) + { + auto handle = cudaView->memoryResource->getBindlessHandle(); + setData(offset, &handle, sizeof(handle)); + auto sizeOffset = offset; + sizeOffset.uniformOffset += sizeof(handle); + auto& desc = *cudaView->memoryResource->getDesc(); + size_t size = desc.sizeInBytes; + if (desc.elementSize > 1) + size /= desc.elementSize; + setData(sizeOffset, &size, sizeof(size)); + } + else if (cudaView->proxyBuffer) + { + auto handle = cudaView->proxyBuffer; + setData(offset, &handle, sizeof(handle)); + auto sizeOffset = offset; + sizeOffset.uniformOffset += sizeof(handle); + auto& desc = *cudaView->memoryResource->getDesc(); + size_t size = desc.sizeInBytes; + if (desc.elementSize > 1) + size /= desc.elementSize; + setData(sizeOffset, &size, sizeof(size)); + } + return SLANG_OK; +} + +SLANG_NO_THROW Result SLANG_MCALL + ShaderObjectImpl::setObject(ShaderOffset const& offset, IShaderObject* object) +{ + SLANG_RETURN_ON_FAIL(Super::setObject(offset, object)); + + auto bindingRangeIndex = offset.bindingRangeIndex; + auto& bindingRange = getLayout()->m_bindingRanges[bindingRangeIndex]; + + ShaderObjectImpl* subObject = static_cast(object); + switch (bindingRange.bindingType) + { + default: + { + void* subObjectDataBuffer = subObject->getBuffer(); + SLANG_RETURN_ON_FAIL(setData(offset, &subObjectDataBuffer, sizeof(void*))); + } + break; + case slang::BindingType::ExistentialValue: + case slang::BindingType::RawBuffer: + case slang::BindingType::MutableRawBuffer: + break; + } + return SLANG_OK; +} + +SLANG_NO_THROW Result SLANG_MCALL + ShaderObjectImpl::setSampler(ShaderOffset const& offset, ISamplerState* sampler) +{ + SLANG_UNUSED(sampler); + SLANG_UNUSED(offset); + return SLANG_OK; +} + +SLANG_NO_THROW Result SLANG_MCALL ShaderObjectImpl::setCombinedTextureSampler( + ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) +{ + SLANG_UNUSED(sampler); + setResource(offset, textureView); + return SLANG_OK; +} + +EntryPointShaderObjectImpl::EntryPointShaderObjectImpl() +{ + m_data.isHostOnly = true; +} + +SLANG_NO_THROW uint32_t SLANG_MCALL RootShaderObjectImpl::addRef() +{ + return 1; +} + +SLANG_NO_THROW uint32_t SLANG_MCALL RootShaderObjectImpl::release() +{ + return 1; +} + +SlangResult RootShaderObjectImpl::init(IDevice* device, ShaderObjectLayoutImpl* typeLayout) +{ + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::init(device, typeLayout)); + auto programLayout = dynamic_cast(typeLayout); + for (auto& entryPoint : programLayout->entryPointLayouts) + { + RefPtr object = new EntryPointShaderObjectImpl(); + SLANG_RETURN_ON_FAIL(object->init(device, entryPoint)); + entryPointObjects.add(object); + } + return SLANG_OK; +} + +SLANG_NO_THROW GfxCount SLANG_MCALL RootShaderObjectImpl::getEntryPointCount() +{ + return (GfxCount)entryPointObjects.getCount(); +} + +SLANG_NO_THROW Result SLANG_MCALL + RootShaderObjectImpl::getEntryPoint(GfxIndex index, IShaderObject** outEntryPoint) +{ + returnComPtr(outEntryPoint, entryPointObjects[index]); + return SLANG_OK; +} + +Result RootShaderObjectImpl::collectSpecializationArgs(ExtendedShaderObjectTypeList& args) +{ + SLANG_RETURN_ON_FAIL(ShaderObjectImpl::collectSpecializationArgs(args)); + for (auto& entryPoint : entryPointObjects) + { + SLANG_RETURN_ON_FAIL(entryPoint->collectSpecializationArgs(args)); + } + return SLANG_OK; +} + +} // namespace cuda +#endif +} // namespace gfx -- cgit v1.2.3