1 files changed, 164 insertions, 334 deletions
diff --git a/tools/gfx/cuda/render-cuda.cpp b/tools/gfx/cuda/render-cuda.cpp
index b919ac6b0..f60be8eda 100644
--- a/tools/gfx/cuda/render-cuda.cpp
+++ b/tools/gfx/cuda/render-cuda.cpp
@@ -70,7 +70,8 @@ struct CUDAErrorInfo
             builder << m_errorString;
         }
 
-        StdWriters::getError().put(builder.getUnownedSlice());
+        getDebugCallback()->handleMessage(DebugMessageType::Error, DebugMessageSource::Driver,
+            builder.getUnownedSlice().begin());
 
         // Slang::signalUnexpectedError(builder.getBuffer());
         return SLANG_FAIL;
@@ -82,7 +83,6 @@ struct CUDAErrorInfo
     const char* m_errorString;
 };
 
-#    if 1
 // If this code path is enabled, CUDA errors will be reported directly to StdWriter::out stream.
 
 static SlangResult _handleCUDAError(CUresult cuResult, const char* file, int line)
@@ -98,27 +98,7 @@ static SlangResult _handleCUDAError(cudaError_t error, const char* file, int lin
     return CUDAErrorInfo(file, line, cudaGetErrorName(error), cudaGetErrorString(error)).handle();
 }
 
-#        define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res, __FILE__, __LINE__)
-
-#    else
-// If this code path is enabled, errors are not reported, but can have an assert enabled
-
-static SlangResult _handleCUDAError(CUresult cuResult)
-{
-    SLANG_UNUSED(cuResult);
-    // SLANG_ASSERT(!"Failed CUDA call");
-    return SLANG_FAIL;
-}
-
-static SlangResult _handleCUDAError(cudaError_t error)
-{
-    SLANG_UNUSED(error);
-    // SLANG_ASSERT(!"Failed CUDA call");
-    return SLANG_FAIL;
-}
-
-#        define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res)
-#    endif
+#    define SLANG_CUDA_HANDLE_ERROR(x) _handleCUDAError(_res, __FILE__, __LINE__)
 
 #    define SLANG_CUDA_RETURN_ON_FAIL(x)              \
         {                                             \
@@ -251,20 +231,13 @@ public:
     RefPtr<CUDAContext> m_cudaContext;
 };
 
-class CUDAResourceView : public IResourceView, public ComObject
+class CUDAResourceView : public ResourceViewBase
 {
 public:
-    SLANG_COM_OBJECT_IUNKNOWN_ALL
-    IResourceView* getInterface(const Guid& guid)
-    {
-        if (guid == GfxGUID::IID_ISlangUnknown || guid == GfxGUID::IID_IResourceView)
-            return static_cast<IResourceView*>(this);
-        return nullptr;
-    }
-public:
     Desc desc;
     RefPtr<MemoryCUDAResource> memoryResource = nullptr;
     RefPtr<TextureCUDAResource> textureResource = nullptr;
+    void* proxyBuffer = nullptr;
 };
 
 class CUDAShaderObjectLayout : public ShaderObjectLayoutBase
@@ -275,6 +248,7 @@ public:
         slang::BindingType bindingType;
         Index count;
         Index baseIndex; // Flat index for sub-ojects
+        Index subObjectIndex;
 
         // TODO: The `uniformOffset` field should be removed,
         // since it cannot be supported by the Slang reflection
@@ -306,12 +280,9 @@ public:
 
     CUDAShaderObjectLayout(RendererBase* renderer, slang::TypeLayoutReflection* layout)
     {
-        initBase(renderer, layout);
-
-        Index subObjectCount = 0;
-        Index resourceCount = 0;
+        m_elementTypeLayout = _unwrapParameterGroups(layout, m_containerType);
 
-        m_elementTypeLayout = _unwrapParameterGroups(layout);
+        initBase(renderer, m_elementTypeLayout);
 
         // Compute the binding ranges that are used to store
         // the logical contents of the object in memory. These will relate
@@ -342,18 +313,31 @@ public:
                 descriptorSetIndex, rangeIndexInDescriptorSet);
 
             Index baseIndex = 0;
+            Index subObjectIndex = 0;
             switch (slangBindingType)
             {
             case slang::BindingType::ConstantBuffer:
             case slang::BindingType::ParameterBlock:
             case slang::BindingType::ExistentialValue:
-                baseIndex = subObjectCount;
-                subObjectCount += count;
+                baseIndex = m_subObjectCount;
+                subObjectIndex = baseIndex;
+                m_subObjectCount += count;
+                break;
+            case slang::BindingType::RawBuffer:
+            case slang::BindingType::MutableRawBuffer:
+                if (slangLeafTypeLayout->getType()->getElementType() != nullptr)
+                {
+                    // A structured buffer occupies both a resource slot and
+                    // a sub-object slot.
+                    subObjectIndex = m_subObjectCount;
+                    m_subObjectCount += count;
+                }
+                baseIndex = m_resourceCount;
+                m_resourceCount += count;
                 break;
-
             default:
-                baseIndex = resourceCount;
-                resourceCount += count;
+                baseIndex = m_resourceCount;
+                m_resourceCount += count;
                 break;
             }
 
@@ -362,12 +346,10 @@ public:
             bindingRangeInfo.count = count;
             bindingRangeInfo.baseIndex = baseIndex;
             bindingRangeInfo.uniformOffset = uniformOffset;
+            bindingRangeInfo.subObjectIndex = subObjectIndex;
             m_bindingRanges.add(bindingRangeInfo);
         }
 
-        m_subObjectCount = subObjectCount;
-        m_resourceCount = resourceCount;
-
         SlangInt subObjectRangeCount = m_elementTypeLayout->getSubObjectRangeCount();
         for (SlangInt r = 0; r < subObjectRangeCount; ++r)
         {
@@ -399,6 +381,9 @@ public:
 
     Index getResourceCount() const { return m_resourceCount; }
     Index getSubObjectCount() const { return m_subObjectCount; }
+    List<SubObjectRangeInfo>& getSubObjectRanges() { return subObjectRanges; }
+    BindingRangeInfo getBindingRange(Index index) { return m_bindingRanges[index]; }
+    Index getBindingRangeCount() const { return m_bindingRanges.getCount(); }
 };
 
 class CUDAProgramLayout : public CUDAShaderObjectLayout
@@ -439,50 +424,108 @@ public:
     }
 };
 
-class CUDAShaderObject : public ShaderObjectBase
+class CUDAShaderObjectData
 {
 public:
-    RefPtr<MemoryCUDAResource> bufferResource;
-    List<RefPtr<CUDAShaderObject>> objects;
-    List<RefPtr<CUDAResourceView>> resources;
-
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        init(IDevice* device, CUDAShaderObjectLayout* typeLayout);
-
-    CUDAShaderObjectLayout* getLayout()
+    bool isHostOnly = false;
+    Slang::RefPtr<MemoryCUDAResource> m_bufferResource;
+    Slang::RefPtr<CUDAResourceView> m_bufferView;
+    Slang::List<uint8_t> m_cpuBuffer;
+    void setCount(Index count)
     {
-        return static_cast<CUDAShaderObjectLayout*>(m_layout.Ptr());
-    }
+        if (isHostOnly)
+        {
+            m_cpuBuffer.setCount(count);
+            if (!m_bufferView)
+            {
+                IResourceView::Desc viewDesc = {};
+                viewDesc.type = IResourceView::Type::UnorderedAccess;
+                m_bufferView = new CUDAResourceView();
+                m_bufferView->proxyBuffer = m_cpuBuffer.getBuffer();
+                m_bufferView->desc = viewDesc;
+            }
+            return;
+        }
 
-    virtual SLANG_NO_THROW Result SLANG_MCALL initBuffer(IDevice* device, size_t bufferSize)
-    {
-        BufferResource::Desc bufferDesc;
-        bufferDesc.type = IResource::Type::Buffer;
-        bufferDesc.defaultState = ResourceState::ConstantBuffer;
-        bufferDesc.allowedStates =
-            ResourceStateSet(ResourceState::ConstantBuffer, ResourceState::CopyDestination);
-        bufferDesc.sizeInBytes = bufferSize;
-        bufferDesc.cpuAccessFlags |= IResource::AccessFlag::Write;
-        ComPtr<IBufferResource> constantBuffer;
-        SLANG_RETURN_ON_FAIL(device->createBufferResource(bufferDesc, nullptr, constantBuffer.writeRef()));
-        bufferResource = static_cast<MemoryCUDAResource*>(constantBuffer.get());
-        return SLANG_OK;
+        if (!m_bufferResource)
+        {
+            IBufferResource::Desc desc;
+            desc.type = IResource::Type::Buffer;
+            desc.sizeInBytes = count;
+            m_bufferResource = new MemoryCUDAResource(desc);
+            if (count)
+                cudaMalloc(&m_bufferResource->m_cudaMemory, (size_t)count);
+            IResourceView::Desc viewDesc = {};
+            viewDesc.type = IResourceView::Type::UnorderedAccess;
+            m_bufferView = new CUDAResourceView();
+            m_bufferView->memoryResource = m_bufferResource;
+            m_bufferView->desc = viewDesc;
+        }
+        auto oldSize = m_bufferResource->getDesc()->sizeInBytes;
+        if ((size_t)count != oldSize)
+        {
+            void* newMemory = nullptr;
+            if (count)
+            {
+                cudaMalloc(&newMemory, (size_t)count);
+            }
+            if (oldSize)
+            {
+                cudaMemcpy(
+                    newMemory,
+                    m_bufferResource->m_cudaMemory,
+                    Math::Min((size_t)count, oldSize),
+                    cudaMemcpyDefault);
+            }
+            cudaFree(m_bufferResource->m_cudaMemory);
+            m_bufferResource->m_cudaMemory = newMemory;
+            m_bufferResource->getDesc()->sizeInBytes = count;
+        }
     }
 
-    virtual SLANG_NO_THROW void* SLANG_MCALL getBuffer()
+    Slang::Index getCount()
     {
-        return bufferResource ? bufferResource->m_cudaMemory : nullptr;
+        if (isHostOnly)
+            return m_cpuBuffer.getCount();
+        if (m_bufferResource)
+            return (Slang::Index)(m_bufferResource->getDesc()->sizeInBytes);
+        else
+            return 0;
     }
 
-    virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize()
+    void* getBuffer()
     {
-        return bufferResource ? bufferResource->getDesc()->sizeInBytes : 0;
+        if (isHostOnly)
+            return m_cpuBuffer.getBuffer();
+
+        if (m_bufferResource)
+            return m_bufferResource->m_cudaMemory;
+        return nullptr;
     }
 
-    virtual SLANG_NO_THROW slang::TypeLayoutReflection* SLANG_MCALL getElementTypeLayout() override
+    /// Returns a resource view for GPU access into the buffer content.
+    ResourceViewBase* getResourceView(
+        RendererBase* device,
+        slang::TypeLayoutReflection* elementLayout,
+        slang::BindingType bindingType)
     {
-        return getLayout()->getElementTypeLayout();
+        SLANG_UNUSED(device);
+        m_bufferResource->getDesc()->elementSize = (int)elementLayout->getSize();
+        return m_bufferView.Ptr();
     }
+};
+
+class CUDAShaderObject
+    : public ShaderObjectBaseImpl<CUDAShaderObject, CUDAShaderObjectLayout, CUDAShaderObjectData>
+{
+    typedef ShaderObjectBaseImpl<CUDAShaderObject, CUDAShaderObjectLayout, CUDAShaderObjectData>
+        Super;
+
+public:
+    List<RefPtr<CUDAResourceView>> resources;
+
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        init(IDevice* device, CUDAShaderObjectLayout* typeLayout);
 
     virtual SLANG_NO_THROW UInt SLANG_MCALL getEntryPointCount() override { return 0; }
     virtual SLANG_NO_THROW Result SLANG_MCALL
@@ -494,164 +537,17 @@ public:
     virtual SLANG_NO_THROW Result SLANG_MCALL
         setData(ShaderOffset const& offset, void const* data, size_t size) override
     {
-        size = Math::Min(size, bufferResource->getDesc()->sizeInBytes - offset.uniformOffset);
+        size = Math::Min(size, (size_t)m_data.getCount() - offset.uniformOffset);
         SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
-            (uint8_t*)bufferResource->m_cudaMemory + offset.uniformOffset,
-            data,
-            size,
-            cudaMemcpyHostToDevice));
+            (uint8_t*)m_data.getBuffer() + offset.uniformOffset, data, size, cudaMemcpyDefault));
         return SLANG_OK;
     }
     virtual SLANG_NO_THROW Result SLANG_MCALL
-        setDeviceData(size_t offset, void* data, size_t size)
-    {
-        size = Math::Min(size, bufferResource->getDesc()->sizeInBytes - offset);
-        SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
-            (uint8_t*)bufferResource->m_cudaMemory + offset,
-            data,
-            size,
-            cudaMemcpyHostToDevice));
-        return SLANG_OK;
-    }
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        getObject(ShaderOffset const& offset, IShaderObject** object) override
+        setResource(ShaderOffset const& offset, IResourceView* resourceView) override
     {
-        auto subObjectIndex =
-            getLayout()->m_bindingRanges[offset.bindingRangeIndex].baseIndex + offset.bindingArrayIndex;
-
-        SLANG_ASSERT(subObjectIndex < objects.getCount());
-        if(subObjectIndex >= objects.getCount())
-            return SLANG_E_INVALID_ARG;
-
-        if (subObjectIndex >= objects.getCount())
-        {
-            *object = nullptr;
+        if (!resourceView)
             return SLANG_OK;
-        }
-        returnComPtr(object, objects[subObjectIndex]);
-        return SLANG_OK;
-    }
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        setObject(ShaderOffset const& offset, IShaderObject* object) override
-    {
-        auto layout = getLayout();
-
-        auto bindingRangeIndex = offset.bindingRangeIndex;
-        SLANG_ASSERT(bindingRangeIndex >= 0);
-        SLANG_ASSERT(bindingRangeIndex < layout->m_bindingRanges.getCount());
-
-        auto& bindingRange = layout->m_bindingRanges[bindingRangeIndex];
-
-        auto subObjectIndex = bindingRange.baseIndex + offset.bindingArrayIndex;
-        auto subObject = dynamic_cast<CUDAShaderObject*>(object);
-
-        // TODO: We should really not need to retain the objects here
-        objects[subObjectIndex] = subObject;
-
-        switch( bindingRange.bindingType )
-        {
-        default:
-            SLANG_RETURN_ON_FAIL(setData(offset, &subObject->bufferResource->m_cudaMemory, sizeof(void*)));
-            break;
-
-        // If the range being assigned into represents an interface/existential-type leaf field,
-        // then we need to consider how the `object` being assigned here affects specialization.
-        // We may also need to assign some data from the sub-object into the ordinary data
-        // buffer for the parent object.
-        //
-        case slang::BindingType::ExistentialValue:
-            {
-                auto renderer = getRenderer();
-
-                ComPtr<slang::ISession> slangSession;
-                SLANG_RETURN_ON_FAIL(renderer->getSlangSession(slangSession.writeRef()));
-
-                // A leaf field of interface type is laid out inside of the parent object
-                // as a tuple of `(RTTI, WitnessTable, Payload)`. The layout of these fields
-                // is a contract between the compiler and any runtime system, so we will
-                // need to rely on details of the binary layout.
-
-                // We start by querying the layout/type of the concrete value that the application
-                // is trying to store into the field, and also the layout/type of the leaf
-                // existential-type field itself.
-                //
-                auto concreteTypeLayout = subObject->getElementTypeLayout();
-                auto concreteType = concreteTypeLayout->getType();
-                //
-                auto existentialTypeLayout = layout->getElementTypeLayout()->getBindingRangeLeafTypeLayout(bindingRangeIndex);
-                auto existentialType = existentialTypeLayout->getType();
-
-                // The first field of the tuple (offset zero) is the run-time type information (RTTI)
-                // ID for the concrete type being stored into the field.
-                //
-                // TODO: We need to be able to gather the RTTI type ID from `object` and then
-                // use `setData(offset, &TypeID, sizeof(TypeID))`.
-
-                // The second field of the tuple (offset 8) is the ID of the "witness" for the
-                // conformance of the concrete type to the interface used by this field.
-                //
-                auto witnessTableOffset = offset;
-                witnessTableOffset.uniformOffset += 8;
-                //
-                // Conformances of a type to an interface are computed and then stored by the
-                // Slang runtime, so we can look up the ID for this particular conformance (which
-                // will create it on demand).
-                //
-                // Note: If the type doesn't actually conform to the required interface for
-                // this sub-object range, then this is the point where we will detect that
-                // fact and error out.
-                //
-                uint32_t conformanceID = 0xFFFFFFFF;
-                SLANG_RETURN_ON_FAIL(slangSession->getTypeConformanceWitnessSequentialID(
-                    concreteType, existentialType, &conformanceID));
-                //
-                // Once we have the conformance ID, then we can write it into the object
-                // at the required offset.
-                //
-                SLANG_RETURN_ON_FAIL(setData(witnessTableOffset, &conformanceID, sizeof(conformanceID)));
-
-                // The third field of the tuple (offset 16) is the "payload" that is supposed to
-                // hold the data for a value of the given concrete type.
-                //
-                auto payloadOffset = offset;
-                payloadOffset.uniformOffset += 16;
-
-                // There are two cases we need to consider here for how the payload might be used:
-                //
-                // * If the concrete type of the value being bound is one that can "fit" into the
-                //   available payload space,  then it should be stored in the payload.
-                //
-                // * If the concrete type of the value cannot fit in the payload space, then it
-                //   will need to be stored somewhere else.
-                //
-                if(_doesValueFitInExistentialPayload(concreteTypeLayout, existentialTypeLayout))
-                {
-                    // If the value can fit in the payload area, then we will go ahead and copy
-                    // its bytes into that area.
-                    //
-                    auto valueSize = concreteTypeLayout->getSize();
-                    SLANG_RETURN_ON_FAIL(setDeviceData(payloadOffset.uniformOffset, subObject->getBuffer(), valueSize));
-                }
-                else
-                {
-                    // If the value cannot fit in the payload area, then we will pass a pointer
-                    // to the sub-object instead.
-                    //
-                    // Note: The Slang compiler does not currently emit code that handles the
-                    // pointer case, but that is the expected implementation for values
-                    // that do not fit into the fixed-size payload.
-                    //
-                    SLANG_RETURN_ON_FAIL(setData(payloadOffset, &subObject->bufferResource->m_cudaMemory, sizeof(void*)));
-                }
-            }
-            break;
-        }
 
-        return SLANG_OK;
-    }
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        setResource(ShaderOffset const& offset, IResourceView* resourceView) override
-    {
         auto layout = getLayout();
 
         auto bindingRangeIndex = offset.bindingRangeIndex;
@@ -678,7 +574,7 @@ public:
                 setData(offset, &handle, sizeof(uint64_t));
             }
         }
-        else
+        else if (cudaView->memoryResource)
         {
             auto handle = cudaView->memoryResource->getBindlessHandle();
             setData(offset, &handle, sizeof(handle));
@@ -689,7 +585,42 @@ public:
             if (desc.elementSize > 1)
                 size /= desc.elementSize;
             setData(sizeOffset, &size, sizeof(size));
+        }
+        else if (cudaView->proxyBuffer)
+        {
+            auto handle = cudaView->proxyBuffer;
+            setData(offset, &handle, sizeof(handle));
+            auto sizeOffset = offset;
+            sizeOffset.uniformOffset += sizeof(handle);
+            auto& desc = *cudaView->memoryResource->getDesc();
+            size_t size = desc.sizeInBytes;
+            if (desc.elementSize > 1)
+                size /= desc.elementSize;
+            setData(sizeOffset, &size, sizeof(size));
+        }
+        return SLANG_OK;
+    }
+    virtual SLANG_NO_THROW Result SLANG_MCALL
+        setObject(ShaderOffset const& offset, IShaderObject* object) override
+    {
+        SLANG_RETURN_ON_FAIL(Super::setObject(offset, object));
 
+        auto bindingRangeIndex = offset.bindingRangeIndex;
+        auto& bindingRange = getLayout()->m_bindingRanges[bindingRangeIndex];
+
+        CUDAShaderObject* subObject = static_cast<CUDAShaderObject*>(object);
+        switch (bindingRange.bindingType)
+        {
+        default:
+            {
+                void* subObjectDataBuffer = subObject->getBuffer();
+                SLANG_RETURN_ON_FAIL(setData(offset, &subObjectDataBuffer, sizeof(void*)));
+            }
+            break;
+        case slang::BindingType::ExistentialValue:
+        case slang::BindingType::RawBuffer:
+        case slang::BindingType::MutableRawBuffer:
+            break;
         }
         return SLANG_OK;
     }
@@ -707,107 +638,12 @@ public:
         setResource(offset, textureView);
         return SLANG_OK;
     }
-
-    // Appends all types that are used to specialize the element type of this shader object in `args` list.
-    virtual Result collectSpecializationArgs(ExtendedShaderObjectTypeList& args) override
-    {
-        // TODO: the logic here is a copy-paste of `GraphicsCommonShaderObject::collectSpecializationArgs`,
-        // consider moving the implementation to `ShaderObjectBase` and share the logic among different implementations.
-
-        auto& subObjectRanges = getLayout()->subObjectRanges;
-        // The following logic is built on the assumption that all fields that involve existential types (and
-        // therefore require specialization) will results in a sub-object range in the type layout.
-        // This allows us to simply scan the sub-object ranges to find out all specialization arguments.
-        for (Index subObjIndex = 0; subObjIndex < subObjectRanges.getCount(); subObjIndex++)
-        {
-            // Retrieve the corresponding binding range of the sub object.
-            auto bindingRange = getLayout()->m_bindingRanges[subObjectRanges[subObjIndex].bindingRangeIndex];
-            switch (bindingRange.bindingType)
-            {
-            case slang::BindingType::ExistentialValue:
-            {
-                // A binding type of `ExistentialValue` means the sub-object represents a interface-typed field.
-                // In this case the specialization argument for this field is the actual specialized type of the bound
-                // shader object. If the shader object's type is an ordinary type without existential fields, then the
-                // type argument will simply be the ordinary type. But if the sub object's type is itself a specialized
-                // type, we need to make sure to use that type as the specialization argument.
-
-                // TODO: need to implement the case where the field is an array of existential values.
-                ExtendedShaderObjectType specializedSubObjType;
-                SLANG_RETURN_ON_FAIL(objects[subObjIndex]->getSpecializedShaderObjectType(&specializedSubObjType));
-                args.add(specializedSubObjType);
-                break;
-            }
-            case slang::BindingType::ParameterBlock:
-            case slang::BindingType::ConstantBuffer:
-                // Currently we only handle the case where the field's type is
-                // `ParameterBlock<SomeStruct>` or `ConstantBuffer<SomeStruct>`, where `SomeStruct` is a struct type
-                // (not directly an interface type). In this case, we just recursively collect the specialization arguments
-                // from the bound sub object.
-                SLANG_RETURN_ON_FAIL(objects[subObjIndex]->collectSpecializationArgs(args));
-                // TODO: we need to handle the case where the field is of the form `ParameterBlock<IFoo>`. We should treat
-                // this case the same way as the `ExistentialValue` case here, but currently we lack a mechanism to distinguish
-                // the two scenarios.
-                break;
-            }
-            // TODO: need to handle another case where specialization happens on resources fields e.g. `StructuredBuffer<IFoo>`.
-        }
-        return SLANG_OK;
-    }
 };
 
 class CUDAEntryPointShaderObject : public CUDAShaderObject
 {
 public:
-    void* hostBuffer = nullptr;
-    size_t uniformBufferSize = 0;
-    // Override buffer allocation so we store all uniform data on host memory instead of device memory.
-    virtual SLANG_NO_THROW Result SLANG_MCALL initBuffer(IDevice* device, size_t bufferSize) override
-    {
-        SLANG_UNUSED(device);
-        uniformBufferSize = bufferSize;
-        hostBuffer = malloc(bufferSize);
-        return SLANG_OK;
-    }
-
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        setData(ShaderOffset const& offset, void const* data, size_t size) override
-    {
-        size = Math::Min(size, uniformBufferSize - offset.uniformOffset);
-        memcpy(
-            (uint8_t*)hostBuffer + offset.uniformOffset,
-            data,
-            size);
-        return SLANG_OK;
-    }
-
-    virtual SLANG_NO_THROW Result SLANG_MCALL
-        setDeviceData(size_t offset, void* data, size_t size) override
-    {
-        size = Math::Min(size, uniformBufferSize - offset);
-        SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(
-            (uint8_t*)hostBuffer + offset,
-            data,
-            size,
-            cudaMemcpyDeviceToHost));
-        return SLANG_OK;
-    }
-
-
-    virtual SLANG_NO_THROW void* SLANG_MCALL getBuffer() override
-    {
-        return hostBuffer;
-    }
-
-    virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize() override
-    {
-        return uniformBufferSize;
-    }
-
-    ~CUDAEntryPointShaderObject()
-    {
-        free(hostBuffer);
-    }
+    CUDAEntryPointShaderObject() { m_data.isHostOnly = true; }
 };
 
 class CUDARootShaderObject : public CUDAShaderObject
@@ -1233,15 +1069,12 @@ public:
                     currentPipeline->shaderProgram->cudaModule,
                     "SLANG_globalParams");
 
-                CUdeviceptr globalParamsCUDAData =
-                    currentRootObject->bufferResource
-                        ? (CUdeviceptr)currentRootObject->bufferResource->getBindlessHandle()
-                        : 0;
+                CUdeviceptr globalParamsCUDAData = (CUdeviceptr)currentRootObject->getBuffer();
                 cudaMemcpyAsync(
                     (void*)globalParamsSymbol,
                     (void*)globalParamsCUDAData,
                     globalParamsSymbolSize,
-                    cudaMemcpyDeviceToDevice,
+                    cudaMemcpyDefault,
                     0);
             }
             //
@@ -1818,7 +1651,7 @@ public:
         SLANG_CUDA_RETURN_ON_FAIL(cudaMallocManaged(&resource->m_cudaMemory, desc.sizeInBytes));
         if (initData)
         {
-            SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(resource->m_cudaMemory, initData, desc.sizeInBytes, cudaMemcpyHostToDevice));
+            SLANG_CUDA_RETURN_ON_FAIL(cudaMemcpy(resource->m_cudaMemory, initData, desc.sizeInBytes, cudaMemcpyDefault));
         }
         returnComPtr(outResource, resource);
         return SLANG_OK;
@@ -2080,7 +1913,7 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
     size_t uniformSize = slangLayout->getSize();
     if (uniformSize)
     {
-        initBuffer(device, uniformSize);
+        m_data.setCount((Index)uniformSize);
     }
 
     // If the layout specifies that we have any resources or sub-objects,
@@ -2090,10 +1923,7 @@ SlangResult CUDAShaderObject::init(IDevice* device, CUDAShaderObjectLayout* type
     // and not just the number of resource/sub-object ranges.
     //
     resources.setCount(typeLayout->getResourceCount());
-    objects.setCount(typeLayout->getSubObjectCount());
-
-    Index subObjectCount = slangLayout->getSubObjectRangeCount();
-    objects.setCount(subObjectCount);
+    m_objects.setCount(typeLayout->getSubObjectCount());
 
     for (auto subObjectRange : getLayout()->subObjectRanges)
     {