summaryrefslogtreecommitdiffstats
path: root/tools/gfx/cuda/render-cuda.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/gfx/cuda/render-cuda.cpp')
-rw-r--r--tools/gfx/cuda/render-cuda.cpp498
1 files changed, 164 insertions, 334 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index b919ac6b0..f60be8eda 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -70,7 +70,8 @@ struct CUDAErrorInfo
builder << m_errorString;
}
- StdWriters::getError().put(builder.getUnownedSlice());
+ getDebugCallback()->handleMessage(DebugMessageType::Error, DebugMessageSource::Driver,
+ builder.getUnownedSlice().begin());
// Slang::signalUnexpectedError(builder.getBuffer());
return SLANG_FAIL;
@@ -82,7 +83,6 @@ struct CUDAErrorInfo
const char* m_errorString;
};
-# if 1
// If this code path is enabled, CUDA errors will be reported directly to StdWriter::out stream.
static SlangResult _handleCUDAError(CUresult cuResult, const char* file, int line)
@@ -98,27 +98,7 @@ static SlangResult _handleCUDAError(cudaError_t error, const char* file, int lin
return CUDAErrorInfo(file, line, cudaGetErrorName(error), cudaGetErrorString(error)).handle();
}
-# define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res, __FILE__, __LINE__)
-
-# else
-// If this code path is enabled, errors are not reported, but can have an assert enabled
-
-static SlangResult _handleCUDAError(CUresult cuResult)
-{
- SLANG_UNUSED(cuResult);
- // SLANG_ASSERT(!"Failed CUDA call");
- return SLANG_FAIL;
-}
-
-static SlangResult _handleCUDAError(cudaError_t error)
-{
- SLANG_UNUSED(error);
- // SLANG_ASSERT(!"Failed CUDA call");
- return SLANG_FAIL;
-}
-
-# define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res)
-# endif
+# define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res, __FILE__, __LINE__)
# define SLANG_CUDA_RETURN_ON_FAIL(x) \
{ \
@@ -251,20 +231,13 @@ public:
RefPtr<CUDAContext> m_cudaContext;
};
-class CUDAResourceView : public IResourceView, public ComObject
+class CUDAResourceView : public ResourceViewBase
{
public:
- SLANG_COM_OBJECT_IUNKNOWN_ALL
- IResourceView* getInterface(const Guid& guid)
- {
- if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IResourceView)
- return static_cast<IResourceView*>(this);
- return nullptr;
- }
-public:
Desc desc;
RefPtr<MemoryCUDAResource> memoryResource = nullptr;
RefPtr<TextureCUDAResource> textureResource = nullptr;
+ void* proxyBuffer = nullptr;
};
class CUDAShaderObjectLayout : public ShaderObjectLayoutBase
@@ -275,6 +248,7 @@ public:
slang::BindingType bindingType;
Index count;
Index baseIndex; // Flat index for sub-ojects
+ Index subObjectIndex;
// TODO: The `uniformOffset` field should be removed,
// since it cannot be supported by the Slang reflection
@@ -306,12 +280,9 @@ public:
CUDAShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout)
{
- initBase(renderer, layout);
-
- Index subObjectCount = 0;
- Index resourceCount = 0;
+ m_elementTypeLayout = _unwrapParameterGroups(layout, m_containerType);
- m_elementTypeLayout = _unwrapParameterGroups(layout);
+ initBase(renderer, m_elementTypeLayout);
// Compute the binding ranges that are used to store
// the logical contents of the object in memory. These will relate
@@ -342,18 +313,31 @@ public:
descriptorSetIndex, rangeIndexInDescriptorSet);
Index baseIndex = 0;
+ Index subObjectIndex = 0;
switch (slangBindingType)
{
case slang::BindingType::ConstantBuffer:
case slang::BindingType::ParameterBlock:
case slang::BindingType::ExistentialValue:
- baseIndex = subObjectCount;
- subObjectCount += count;
+ baseIndex = m_subObjectCount;
+ subObjectIndex = baseIndex;
+ m_subObjectCount += count;
+ break;
+ case slang::BindingType::RawBuffer:
+ case slang::BindingType::MutableRawBuffer:
+ if (slangLeafTypeLayout->getType()->getElementType() != nullptr)
+ {
+ // A structured buffer occupies both a resource slot and
+ // a sub-object slot.
+ subObjectIndex = m_subObjectCount;
+ m_subObjectCount += count;
+ }
+ baseIndex = m_resourceCount;
+ m_resourceCount += count;
break;
-
default:
- baseIndex = resourceCount;
- resourceCount += count;
+ baseIndex = m_resourceCount;
+ m_resourceCount += count;
break;
}
@@ -362,12 +346,10 @@ public:
bindingRangeInfo.count = count;
bindingRangeInfo.baseIndex = baseIndex;
bindingRangeInfo.uniformOffset = uniformOffset;
+ bindingRangeInfo.subObjectIndex = subObjectIndex;
m_bindingRanges.add(bindingRangeInfo);
}
- m_subObjectCount = subObjectCount;
- m_resourceCount = resourceCount;
-
SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount();
for (SlangInt r = 0; r < subObjectRangeCount; ++r)
{
@@ -399,6 +381,9 @@ public:
Index getResourceCount() const { return m_resourceCount; }
Index getSubObjectCount() const { return m_subObjectCount; }
+ List<SubObjectRangeInfo>& getSubObjectRanges() { return subObjectRanges; }
+ BindingRangeInfo getBindingRange(Index index) { return m_bindingRanges[index]; }
+ Index getBindingRangeCount() const { return m_bindingRanges.getCount(); }
};
class CUDAProgramLayout : public CUDAShaderObjectLayout
@@ -439,50 +424,108 @@ public:
}
};
-class CUDAShaderObject : public ShaderObjectBase
+class CUDAShaderObjectData
{
public:
- RefPtr<MemoryCUDAResource> bufferResource;
- List<RefPtr<CUDAShaderObject>> objects;
- List<RefPtr<CUDAResourceView>> resources;
-
- virtual SLANG_NO_THROW Result SLANG_MCALL
- init(IDevice* device, CUDAShaderObjectLayout* typeLayout);
-
- CUDAShaderObjectLayout* getLayout()
+ bool isHostOnly = false;
+ Slang::RefPtr<MemoryCUDAResource> m_bufferResource;
+ Slang::RefPtr<CUDAResourceView> m_bufferView;
+ Slang::List<uint8_t> m_cpuBuffer;
+ void setCount(Index count)
{
- return static_cast<CUDAShaderObjectLayout*>(m_layout.Ptr());
- }
+ if (isHostOnly)
+ {
+ m_cpuBuffer.setCount(count);
+ if (!m_bufferView)
+ {
+ IResourceView::Desc viewDesc = {};
+ viewDesc.type = IResourceView::Type::UnorderedAccess;
+ m_bufferView = new CUDAResourceView();
+ m_bufferView->proxyBuffer = m_cpuBuffer.getBuffer();
+ m_bufferView->desc = viewDesc;
+ }
+ return;
+ }
- virtual SLANG_NO_THROW Result SLANG_MCALL initBuffer(IDevice* device, size_t bufferSize)
- {
- BufferResource::Desc bufferDesc;
- bufferDesc.type = IResource::Type::Buffer;
- bufferDesc.defaultState = ResourceState::ConstantBuffer;
- bufferDesc.allowedStates =
- ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination);
- bufferDesc.sizeInBytes = bufferSize;
- bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write;
- ComPtr<IBufferResource> constantBuffer;
- SLANG_RETURN_ON_FAIL(device->createBufferResource(bufferDesc, nullptr, constantBuffer.writeRef()));
- bufferResource = static_cast<MemoryCUDAResource*>(constantBuffer.get());
- return SLANG_OK;
+ if (!m_bufferResource)
+ {
+ IBufferResource::Desc desc;
+ desc.type = IResource::Type::Buffer;
+ desc.sizeInBytes = count;
+ m_bufferResource = new MemoryCUDAResource(desc);
+ if (count)
+ cudaMalloc(&m_bufferResource->m_cudaMemory, (size_t)count);
+ IResourceView::Desc viewDesc = {};
+ viewDesc.type = IResourceView::Type::UnorderedAccess;
+ m_bufferView = new CUDAResourceView();
+ m_bufferView->memoryResource = m_bufferResource;
+ m_bufferView->desc = viewDesc;
+ }
+ auto oldSize = m_bufferResource->getDesc()->sizeInBytes;
+ if ((size_t)count != oldSize)
+ {
+ void* newMemory = nullptr;
+ if (count)
+ {
+ cudaMalloc(&newMemory, (size_t)count);
+ }
+ if (oldSize)
+ {
+ cudaMemcpy(
+ newMemory,
+ m_bufferResource->m_cudaMemory,
+ Math::Min((size_t)count, oldSize),
+ cudaMemcpyDefault);
+ }
+ cudaFree(m_bufferResource->m_cudaMemory);
+ m_bufferResource->m_cudaMemory = newMemory;
+ m_bufferResource->getDesc()->sizeInBytes = count;
+ }
}
- virtual SLANG_NO_THROW void* SLANG_MCALL getBuffer()
+ Slang::Index getCount()
{
- return bufferResource ? bufferResource->m_cudaMemory : nullptr;
+ if (isHostOnly)
+ return m_cpuBuffer.getCount();
+ if (m_bufferResource)
+ return (Slang::Index)(m_bufferResource->getDesc()->sizeInBytes);
+ else
+ return 0;
}
- virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize()
+ void* getBuffer()
{
- return bufferResource ? bufferResource->getDesc()->sizeInBytes : 0;
+ if (isHostOnly)
+ return m_cpuBuffer.getBuffer();
+
+ if (m_bufferResource)
+ return m_bufferResource->m_cudaMemory;
+ return nullptr;
}
- virtual SLANG_NO_THROW slang::TypeLayoutReflection* SLANG_MCALL getElementTypeLayout() override
+ /// Returns a resource view for GPU access into the buffer content.
+ ResourceViewBase* getResourceView(
+ RendererBase* device,
+ slang::TypeLayoutReflection* elementLayout,
+ slang::BindingType bindingType)
{
- return getLayout()->getElementTypeLayout();
+ SLANG_UNUSED(device);
+ m_bufferResource->getDesc()->elementSize = (int)elementLayout->getSize();
+ return m_bufferView.Ptr();
}
+};
+
+class CUDAShaderObject
+ : public ShaderObjectBaseImpl<CUDAShaderObject, CUDAShaderObjectLayout, CUDAShaderObjectData>
+{
+ typedef ShaderObjectBaseImpl<CUDAShaderObject, CUDAShaderObjectLayout, CUDAShaderObjectData>
+ Super;
+
+public:
+ List<RefPtr<CUDAResourceView>> resources;
+
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ init(IDevice* device, CUDAShaderObjectLayout* typeLayout);
virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return 0; }
virtual SLANG_NO_THROW Result SLANG_MCALL
@@ -494,164 +537,17 @@ public:
virtual SLANG_NO_THROW Result SLANG_MCALL
setData(ShaderOffset const& offset, void const* data, size_t size) override
{
- size = Math::Min(size, bufferResource->getDesc()->sizeInBytes - offset.uniformOffset);
+ size = Math::Min(size, (size_t)m_data.getCount() - offset.uniformOffset);
SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
- (uint8_t*)bufferResource->m_cudaMemory + offset.uniformOffset,
- data,
- size,
- cudaMemcpyHostToDevice));
+ (uint8_t*)m_data.getBuffer() + offset.uniformOffset, data, size, cudaMemcpyDefault));
return SLANG_OK;
}
virtual SLANG_NO_THROW Result SLANG_MCALL
- setDeviceData(size_t offset, void* data, size_t size)
- {
- size = Math::Min(size, bufferResource->getDesc()->sizeInBytes - offset);
- SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
- (uint8_t*)bufferResource->m_cudaMemory + offset,
- data,
- size,
- cudaMemcpyHostToDevice));
- return SLANG_OK;
- }
- virtual SLANG_NO_THROW Result SLANG_MCALL
- getObject(ShaderOffset const& offset, IShaderObject** object) override
+ setResource(ShaderOffset const& offset, IResourceView* resourceView) override
{
- auto subObjectIndex =
- getLayout()->m_bindingRanges[offset.bindingRangeIndex].baseIndex + offset.bindingArrayIndex;
-
- SLANG_ASSERT(subObjectIndex < objects.getCount());
- if(subObjectIndex >= objects.getCount())
- return SLANG_E_INVALID_ARG;
-
- if (subObjectIndex >= objects.getCount())
- {
- *object = nullptr;
+ if (!resourceView)
return SLANG_OK;
- }
- returnComPtr(object, objects[subObjectIndex]);
- return SLANG_OK;
- }
- virtual SLANG_NO_THROW Result SLANG_MCALL
- setObject(ShaderOffset const& offset, IShaderObject* object) override
- {
- auto layout = getLayout();
-
- auto bindingRangeIndex = offset.bindingRangeIndex;
- SLANG_ASSERT(bindingRangeIndex >= 0);
- SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
-
- auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
-
- auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
- auto subObject = dynamic_cast<CUDAShaderObject*>(object);
-
- // TODO: We should really not need to retain the objects here
- objects[subObjectIndex] = subObject;
-
- switch( bindingRange.bindingType )
- {
- default:
- SLANG_RETURN_ON_FAIL(setData(offset, &subObject->bufferResource->m_cudaMemory, sizeof(void*)));
- break;
-
- // If the range being assigned into represents an interface/existential-type leaf field,
- // then we need to consider how the `object` being assigned here affects specialization.
- // We may also need to assign some data from the sub-object into the ordinary data
- // buffer for the parent object.
- //
- case slang::BindingType::ExistentialValue:
- {
- auto renderer = getRenderer();
-
- ComPtr<slang::ISession> slangSession;
- SLANG_RETURN_ON_FAIL(renderer->getSlangSession(slangSession.writeRef()));
-
- // A leaf field of interface type is laid out inside of the parent object
- // as a tuple of `(RTTI, WitnessTable, Payload)`. The layout of these fields
- // is a contract between the compiler and any runtime system, so we will
- // need to rely on details of the binary layout.
-
- // We start by querying the layout/type of the concrete value that the application
- // is trying to store into the field, and also the layout/type of the leaf
- // existential-type field itself.
- //
- auto concreteTypeLayout = subObject->getElementTypeLayout();
- auto concreteType = concreteTypeLayout->getType();
- //
- auto existentialTypeLayout = layout->getElementTypeLayout()->getBindingRangeLeafTypeLayout(bindingRangeIndex);
- auto existentialType = existentialTypeLayout->getType();
-
- // The first field of the tuple (offset zero) is the run-time type information (RTTI)
- // ID for the concrete type being stored into the field.
- //
- // TODO: We need to be able to gather the RTTI type ID from `object` and then
- // use `setData(offset, &TypeID, sizeof(TypeID))`.
-
- // The second field of the tuple (offset 8) is the ID of the "witness" for the
- // conformance of the concrete type to the interface used by this field.
- //
- auto witnessTableOffset = offset;
- witnessTableOffset.uniformOffset += 8;
- //
- // Conformances of a type to an interface are computed and then stored by the
- // Slang runtime, so we can look up the ID for this particular conformance (which
- // will create it on demand).
- //
- // Note: If the type doesn't actually conform to the required interface for
- // this sub-object range, then this is the point where we will detect that
- // fact and error out.
- //
- uint32_t conformanceID = 0xFFFFFFFF;
- SLANG_RETURN_ON_FAIL(slangSession->getTypeConformanceWitnessSequentialID(
- concreteType, existentialType, &conformanceID));
- //
- // Once we have the conformance ID, then we can write it into the object
- // at the required offset.
- //
- SLANG_RETURN_ON_FAIL(setData(witnessTableOffset, &conformanceID, sizeof(conformanceID)));
-
- // The third field of the tuple (offset 16) is the "payload" that is supposed to
- // hold the data for a value of the given concrete type.
- //
- auto payloadOffset = offset;
- payloadOffset.uniformOffset += 16;
-
- // There are two cases we need to consider here for how the payload might be used:
- //
- // * If the concrete type of the value being bound is one that can "fit" into the
- // available payload space, then it should be stored in the payload.
- //
- // * If the concrete type of the value cannot fit in the payload space, then it
- // will need to be stored somewhere else.
- //
- if(_doesValueFitInExistentialPayload(concreteTypeLayout, existentialTypeLayout))
- {
- // If the value can fit in the payload area, then we will go ahead and copy
- // its bytes into that area.
- //
- auto valueSize = concreteTypeLayout->getSize();
- SLANG_RETURN_ON_FAIL(setDeviceData(payloadOffset.uniformOffset, subObject->getBuffer(), valueSize));
- }
- else
- {
- // If the value cannot fit in the payload area, then we will pass a pointer
- // to the sub-object instead.
- //
- // Note: The Slang compiler does not currently emit code that handles the
- // pointer case, but that is the expected implementation for values
- // that do not fit into the fixed-size payload.
- //
- SLANG_RETURN_ON_FAIL(setData(payloadOffset, &subObject->bufferResource->m_cudaMemory, sizeof(void*)));
- }
- }
- break;
- }
- return SLANG_OK;
- }
- virtual SLANG_NO_THROW Result SLANG_MCALL
- setResource(ShaderOffset const& offset, IResourceView* resourceView) override
- {
auto layout = getLayout();
auto bindingRangeIndex = offset.bindingRangeIndex;
@@ -678,7 +574,7 @@ public:
setData(offset, &handle, sizeof(uint64_t));
}
}
- else
+ else if (cudaView->memoryResource)
{
auto handle = cudaView->memoryResource->getBindlessHandle();
setData(offset, &handle, sizeof(handle));
@@ -689,7 +585,42 @@ public:
if (desc.elementSize > 1)
size /= desc.elementSize;
setData(sizeOffset, &size, sizeof(size));
+ }
+ else if (cudaView->proxyBuffer)
+ {
+ auto handle = cudaView->proxyBuffer;
+ setData(offset, &handle, sizeof(handle));
+ auto sizeOffset = offset;
+ sizeOffset.uniformOffset += sizeof(handle);
+ auto& desc = *cudaView->memoryResource->getDesc();
+ size_t size = desc.sizeInBytes;
+ if (desc.elementSize > 1)
+ size /= desc.elementSize;
+ setData(sizeOffset, &size, sizeof(size));
+ }
+ return SLANG_OK;
+ }
+ virtual SLANG_NO_THROW Result SLANG_MCALL
+ setObject(ShaderOffset const& offset, IShaderObject* object) override
+ {
+ SLANG_RETURN_ON_FAIL(Super::setObject(offset, object));
+ auto bindingRangeIndex = offset.bindingRangeIndex;
+ auto& bindingRange = getLayout()->m_bindingRanges[bindingRangeIndex];
+
+ CUDAShaderObject* subObject = static_cast<CUDAShaderObject*>(object);
+ switch (bindingRange.bindingType)
+ {
+ default:
+ {
+ void* subObjectDataBuffer = subObject->getBuffer();
+ SLANG_RETURN_ON_FAIL(setData(offset, &subObjectDataBuffer, sizeof(void*)));
+ }
+ break;
+ case slang::BindingType::ExistentialValue:
+ case slang::BindingType::RawBuffer:
+ case slang::BindingType::MutableRawBuffer:
+ break;
}
return SLANG_OK;
}
@@ -707,107 +638,12 @@ public:
setResource(offset, textureView);
return SLANG_OK;
}
-
- // Appends all types that are used to specialize the element type of this shader object in `args` list.
- virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
- {
- // TODO: the logic here is a copy-paste of `GraphicsCommonShaderObject::collectSpecializationArgs`,
- // consider moving the implementation to `ShaderObjectBase` and share the logic among different implementations.
-
- auto& subObjectRanges = getLayout()->subObjectRanges;
- // The following logic is built on the assumption that all fields that involve existential types (and
- // therefore require specialization) will results in a sub-object range in the type layout.
- // This allows us to simply scan the sub-object ranges to find out all specialization arguments.
- for (Index subObjIndex = 0; subObjIndex < subObjectRanges.getCount(); subObjIndex++)
- {
- // Retrieve the corresponding binding range of the sub object.
- auto bindingRange = getLayout()->m_bindingRanges[subObjectRanges[subObjIndex].bindingRangeIndex];
- switch (bindingRange.bindingType)
- {
- case slang::BindingType::ExistentialValue:
- {
- // A binding type of `ExistentialValue` means the sub-object represents a interface-typed field.
- // In this case the specialization argument for this field is the actual specialized type of the bound
- // shader object. If the shader object's type is an ordinary type without existential fields, then the
- // type argument will simply be the ordinary type. But if the sub object's type is itself a specialized
- // type, we need to make sure to use that type as the specialization argument.
-
- // TODO: need to implement the case where the field is an array of existential values.
- ExtendedShaderObjectType specializedSubObjType;
- SLANG_RETURN_ON_FAIL(objects[subObjIndex]->getSpecializedShaderObjectType(&specializedSubObjType));
- args.add(specializedSubObjType);
- break;
- }
- case slang::BindingType::ParameterBlock:
- case slang::BindingType::ConstantBuffer:
- // Currently we only handle the case where the field's type is
- // `ParameterBlock<SomeStruct>` or `ConstantBuffer<SomeStruct>`, where `SomeStruct` is a struct type
- // (not directly an interface type). In this case, we just recursively collect the specialization arguments
- // from the bound sub object.
- SLANG_RETURN_ON_FAIL(objects[subObjIndex]->collectSpecializationArgs(args));
- // TODO: we need to handle the case where the field is of the form `ParameterBlock<IFoo>`. We should treat
- // this case the same way as the `ExistentialValue` case here, but currently we lack a mechanism to distinguish
- // the two scenarios.
- break;
- }
- // TODO: need to handle another case where specialization happens on resources fields e.g. `StructuredBuffer<IFoo>`.
- }
- return SLANG_OK;
- }
};
class CUDAEntryPointShaderObject : public CUDAShaderObject
{
public:
- void* hostBuffer = nullptr;
- size_t uniformBufferSize = 0;
- // Override buffer allocation so we store all uniform data on host memory instead of device memory.
- virtual SLANG_NO_THROW Result SLANG_MCALL initBuffer(IDevice* device, size_t bufferSize) override
- {
- SLANG_UNUSED(device);
- uniformBufferSize = bufferSize;
- hostBuffer = malloc(bufferSize);
- return SLANG_OK;
- }
-
- virtual SLANG_NO_THROW Result SLANG_MCALL
- setData(ShaderOffset const& offset, void const* data, size_t size) override
- {
- size = Math::Min(size, uniformBufferSize - offset.uniformOffset);
- memcpy(
- (uint8_t*)hostBuffer + offset.uniformOffset,
- data,
- size);
- return SLANG_OK;
- }
-
- virtual SLANG_NO_THROW Result SLANG_MCALL
- setDeviceData(size_t offset, void* data, size_t size) override
- {
- size = Math::Min(size, uniformBufferSize - offset);
- SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
- (uint8_t*)hostBuffer + offset,
- data,
- size,
- cudaMemcpyDeviceToHost));
- return SLANG_OK;
- }
-
-
- virtual SLANG_NO_THROW void* SLANG_MCALL getBuffer() override
- {
- return hostBuffer;
- }
-
- virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize() override
- {
- return uniformBufferSize;
- }
-
- ~CUDAEntryPointShaderObject()
- {
- free(hostBuffer);
- }
+ CUDAEntryPointShaderObject() { m_data.isHostOnly = true; }
};
class CUDARootShaderObject : public CUDAShaderObject
@@ -1233,15 +1069,12 @@ public:
currentPipeline->shaderProgram->cudaModule,
"SLANG_globalParams");
- CUdeviceptr globalParamsCUDAData =
- currentRootObject->bufferResource
- ? (CUdeviceptr)currentRootObject->bufferResource->getBindlessHandle()
- : 0;
+ CUdeviceptr globalParamsCUDAData = (CUdeviceptr)currentRootObject->getBuffer();
cudaMemcpyAsync(
(void*)globalParamsSymbol,
(void*)globalParamsCUDAData,
globalParamsSymbolSize,
- cudaMemcpyDeviceToDevice,
+ cudaMemcpyDefault,
0);
}
//
@@ -1818,7 +1651,7 @@ public:
SLANG_CUDA_RETURN_ON_FAIL(cudaMallocManaged(&resource->m_cudaMemory, desc.sizeInBytes));
if (initData)
{
- SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(resource->m_cudaMemory, initData, desc.sizeInBytes, cudaMemcpyHostToDevice));
+ SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(resource->m_cudaMemory, initData, desc.sizeInBytes, cudaMemcpyDefault));
}
returnComPtr(outResource, resource);
return SLANG_OK;
@@ -2080,7 +1913,7 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
size_t uniformSize = slangLayout->getSize();
if (uniformSize)
{
- initBuffer(device, uniformSize);
+ m_data.setCount((Index)uniformSize);
}
// If the layout specifies that we have any resources or sub-objects,
@@ -2090,10 +1923,7 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
// and not just the number of resource/sub-object ranges.
//
resources.setCount(typeLayout->getResourceCount());
- objects.setCount(typeLayout->getSubObjectCount());
-
- Index subObjectCount = slangLayout->getSubObjectRangeCount();
- objects.setCount(subObjectCount);
+ m_objects.setCount(typeLayout->getSubObjectCount());
for (auto subObjectRange : getLayout()->subObjectRanges)
{