summaryrefslogtreecommitdiffstats
path: root/tools/gfx
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-08-21 16:04:42 -0400
committerGitHub <noreply@github.com>2020-08-21 13:04:42 -0700
commitfcac02e405661de311b5ceebbd6d3e2c78bf8aea (patch)
tree6e79865b39f0739d2ac9c3f91cc4129c244b6977 /tools/gfx
parent49067fd2e97b40649df3fa2ce096f78c2e45da5a (diff)
Vulkan update/NVAPI support (#1511)
* First pass at incorporating nvapi into test harness. * D3d12 Atomic Float Add via NVAPI working * Dx12 atomic float appears to work. * Atomic float add on Dx12. * Added atomic64 feature addition to vk. Fix correct output for atomic-float-byte-address.slang * Disable atomic float failing tests. * Upgraded VK headers. * Detect atomic float availability on VK. * Try to get test working for in64 atomic. * Made HLSL prelude controlled via the render-test requirements. * Added -enable-nvapi to premake. * Fix D3D12Renderer when NVAPI is not available. * Small improvements to VKRenderer. * Improve atomic documentation in target-compatibility.md.
Diffstat (limited to 'tools/gfx')
-rw-r--r--tools/gfx/d3d11/render-d3d11.cpp101
-rw-r--r--tools/gfx/d3d12/render-d3d12.cpp74
-rw-r--r--tools/gfx/gfx.vcxproj3
-rw-r--r--tools/gfx/gfx.vcxproj.filters9
-rw-r--r--tools/gfx/nvapi/nvapi-include.h19
-rw-r--r--tools/gfx/nvapi/nvapi-util.cpp30
-rw-r--r--tools/gfx/nvapi/nvapi-util.h19
-rw-r--r--tools/gfx/open-gl/render-gl.cpp7
-rw-r--r--tools/gfx/render.h7
-rw-r--r--tools/gfx/vulkan/render-vk.cpp77
10 files changed, 314 insertions, 32 deletions
diff --git a/tools/gfx/d3d11/render-d3d11.cpp b/tools/gfx/d3d11/render-d3d11.cpp
index 4eba4edaf..8eafd24b4 100644
--- a/tools/gfx/d3d11/render-d3d11.cpp
+++ b/tools/gfx/d3d11/render-d3d11.cpp
@@ -7,6 +7,7 @@
//WORKING: #include "options.h"
#include "../render.h"
#include "../d3d/d3d-util.h"
+#include "../nvapi/nvapi-util.h"
#include "../surface.h"
@@ -29,6 +30,13 @@
#include <d3d11_2.h>
#include <d3dcompiler.h>
+#ifdef GFX_NVAPI
+// NVAPI integration is desribed here
+// https://developer.nvidia.com/unlocking-gpu-intrinsics-hlsl
+
+# include "../nvapi/nvapi-include.h"
+#endif
+
// We will use the C standard library just for printing error messages.
#include <stdio.h>
@@ -52,6 +60,7 @@ public:
kMaxRTVs = 8,
};
+
// Renderer implementation
virtual SlangResult initialize(const Desc& desc, void* inWindowHandle) override;
virtual const List<String>& getFeatures() override { return m_features; }
@@ -102,6 +111,17 @@ public:
protected:
+ class ScopeNVAPI
+ {
+ public:
+ ScopeNVAPI() : m_renderer(nullptr) {}
+ SlangResult init(D3D11Renderer* renderer, Index regIndex);
+ ~ScopeNVAPI();
+
+ protected:
+ D3D11Renderer* m_renderer;
+ };
+
#if 0
struct BindingDetail
{
@@ -124,6 +144,7 @@ public:
};
#endif
+
enum class D3D11DescriptorSlotType
{
ConstantBuffer,
@@ -391,6 +412,8 @@ public:
float m_clearColor[4] = { 0, 0, 0, 0 };
List<String> m_features;
+
+ bool m_nvapi = false;
};
Renderer* createD3D11Renderer()
@@ -398,6 +421,44 @@ Renderer* createD3D11Renderer()
return new D3D11Renderer();
}
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!ScopeNVAPI !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+SlangResult D3D11Renderer::ScopeNVAPI::init(D3D11Renderer* renderer, Index regIndex)
+{
+ if (!renderer->m_nvapi)
+ {
+ // There is nothing to set as nvapi is not set
+ return SLANG_OK;
+ }
+
+#ifdef GFX_NVAPI
+ NvAPI_Status nvapiStatus = NvAPI_D3D11_SetNvShaderExtnSlot(renderer->m_device, NvU32(regIndex));
+ if (nvapiStatus != NVAPI_OK)
+ {
+ return SLANG_FAIL;
+ }
+#endif
+
+ // Record the renderer so it can be freed
+ m_renderer = renderer;
+ return SLANG_OK;
+}
+
+D3D11Renderer::ScopeNVAPI::~ScopeNVAPI()
+{
+ // If the m_renderer is not set, it must not have been set up
+ if (m_renderer)
+ {
+#ifdef GFX_NVAPI
+ // Disable the slot used
+ NvAPI_Status nvapiStatus = NvAPI_D3D11_SetNvShaderExtnSlot(m_renderer->m_device, ~0);
+ SLANG_ASSERT(nvapiStatus == NVAPI_OK);
+#endif
+ }
+}
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!D3D11Renderer !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
/* static */HRESULT D3D11Renderer::captureTextureToSurface(ID3D11Device* device, ID3D11DeviceContext* context, ID3D11Texture2D* texture, Surface& surfaceOut)
{
if (!context) return E_INVALIDARG;
@@ -585,6 +646,30 @@ SlangResult D3D11Renderer::initialize(const Desc& desc, void* inWindowHandle)
SLANG_ASSERT(m_immediateContext && m_swapChain && m_device);
}
+ // NVAPI
+ {
+ const char* features[] = { "nvapi", "atomic-float", "atomic-int64" };
+ bool needsNvapi = false;
+ for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
+ {
+ if (desc.requiredFeatures.indexOf(features[i]) >= 0)
+ {
+ needsNvapi = true;
+ break;
+ }
+ }
+
+ if (needsNvapi && SLANG_SUCCEEDED(NVAPIUtil::initialize()))
+ {
+ // TODO(JS): We should test for specific features here.
+ for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
+ {
+ m_features.add(features[i]);
+ }
+ m_nvapi = true;
+ }
+ }
+
// TODO: Add support for debugging to help detect leaks:
//
// ComPtr<ID3D11Debug> gDebug;
@@ -1505,7 +1590,12 @@ Result D3D11Renderer::createProgram(const ShaderProgram::Desc& desc, ShaderProgr
auto computeKernel = desc.findKernel(StageType::Compute);
ComPtr<ID3D11ComputeShader> computeShader;
- SLANG_RETURN_ON_FAIL(m_device->CreateComputeShader(computeKernel->codeBegin, computeKernel->getCodeSize(), nullptr, computeShader.writeRef()));
+
+ {
+ ScopeNVAPI scopeNVAPI;
+ SLANG_RETURN_ON_FAIL(scopeNVAPI.init(this, 0));
+ SLANG_RETURN_ON_FAIL(m_device->CreateComputeShader(computeKernel->codeBegin, computeKernel->getCodeSize(), nullptr, computeShader.writeRef()));
+ }
RefPtr<ShaderProgramImpl> shaderProgram = new ShaderProgramImpl();
shaderProgram->m_computeShader.swap(computeShader);
@@ -1521,8 +1611,13 @@ Result D3D11Renderer::createProgram(const ShaderProgram::Desc& desc, ShaderProgr
ComPtr<ID3D11VertexShader> vertexShader;
ComPtr<ID3D11PixelShader> pixelShader;
- SLANG_RETURN_ON_FAIL(m_device->CreateVertexShader(vertexKernel->codeBegin, vertexKernel->getCodeSize(), nullptr, vertexShader.writeRef()));
- SLANG_RETURN_ON_FAIL(m_device->CreatePixelShader(fragmentKernel->codeBegin, fragmentKernel->getCodeSize(), nullptr, pixelShader.writeRef()));
+ {
+ ScopeNVAPI scopeNVAPI;
+ SLANG_RETURN_ON_FAIL(scopeNVAPI.init(this, 0));
+
+ SLANG_RETURN_ON_FAIL(m_device->CreateVertexShader(vertexKernel->codeBegin, vertexKernel->getCodeSize(), nullptr, vertexShader.writeRef()));
+ SLANG_RETURN_ON_FAIL(m_device->CreatePixelShader(fragmentKernel->codeBegin, fragmentKernel->getCodeSize(), nullptr, pixelShader.writeRef()));
+ }
RefPtr<ShaderProgramImpl> shaderProgram = new ShaderProgramImpl();
shaderProgram->m_vertexShader.swap(vertexShader);
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp
index ba9bde63a..ca44aa04d 100644
--- a/tools/gfx/d3d12/render-d3d12.cpp
+++ b/tools/gfx/d3d12/render-d3d12.cpp
@@ -23,7 +23,16 @@
#include <dxgi1_4.h>
#include <d3d12.h>
-#include <d3dcompiler.h>
+//#include <d3dcompiler.h>
+
+#ifndef __ID3D12GraphicsCommandList1_FWD_DEFINED__
+// If can't find a definition of CommandList1, just use an empty definition
+struct ID3D12GraphicsCommandList1 {};
+#endif
+
+#ifdef GFX_NVAPI
+# include "../nvapi/nvapi-include.h"
+#endif
#include "../../slang-com-ptr.h"
#include "../flag-combiner.h"
@@ -34,6 +43,8 @@
#include "../d3d/d3d-util.h"
+#include "../nvapi/nvapi-util.h"
+
// We will use the C standard library just for printing error messages.
#include <stdio.h>
@@ -690,6 +701,8 @@ protected:
HWND m_hwnd = nullptr;
List<String> m_features;
+
+ bool m_nvapi = false;
};
Renderer* createD3D12Renderer()
@@ -1589,6 +1602,30 @@ Result D3D12Renderer::initialize(const Desc& desc, void* inWindowHandle)
return SLANG_FAIL;
}
+ // NVAPI
+ {
+ const char* features[] = { "nvapi", "atomic-float", "atomic-int64" };
+ bool needsNvapi = false;
+ for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
+ {
+ if (desc.requiredFeatures.indexOf(features[i]) >= 0)
+ {
+ needsNvapi = true;
+ break;
+ }
+ }
+
+ if (needsNvapi && SLANG_SUCCEEDED(NVAPIUtil::initialize()))
+ {
+ // TODO(JS): We should test for specific features here.
+ for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
+ {
+ m_features.add(features[i]);
+ }
+ m_nvapi = true;
+ }
+ }
+
// Set the device
m_device = m_deviceInfo.m_device;
@@ -3209,7 +3246,7 @@ void D3D12Renderer::DescriptorSetImpl::setRootConstants(
// have been a root-constant range for this call to be
// valid.
//
- SLANG_ASSERT(range < m_layout->m_ranges.getCount());
+ SLANG_ASSERT(range < UInt(m_layout->m_ranges.getCount()));
auto& rangeInfo = m_layout->m_ranges[range];
SLANG_ASSERT(rangeInfo.type == DescriptorSlotType::RootConstant);
@@ -3222,7 +3259,7 @@ void D3D12Renderer::DescriptorSetImpl::setRootConstants(
SLANG_ASSERT(rootConstantIndex >= 0);
SLANG_ASSERT(rootConstantIndex < m_layout->m_rootConstantRanges.getCount());
auto& rootConstantRangeInfo = m_layout->m_rootConstantRanges[rootConstantIndex];
- SLANG_ASSERT(offset + size <= rootConstantRangeInfo.size);
+ SLANG_ASSERT(offset + size <= UInt(rootConstantRangeInfo.size));
memcpy((char*)m_rootConstantData.getBuffer() + rootConstantRangeInfo.offset + offset, data, size);
}
@@ -3951,7 +3988,36 @@ Result D3D12Renderer::createComputePipelineState(const ComputePipelineStateDesc&
computeDesc.CS = { programImpl->m_computeShader.getBuffer(), SIZE_T(programImpl->m_computeShader.getCount()) };
ComPtr<ID3D12PipelineState> pipelineState;
- SLANG_RETURN_ON_FAIL(m_device->CreateComputePipelineState(&computeDesc, IID_PPV_ARGS(pipelineState.writeRef())));
+
+#ifdef GFX_NVAPI
+ if (m_nvapi)
+ {
+ // Also fill the extension structure.
+ // Use the same UAV slot index and register space that are declared in the shader.
+
+ // For simplicities sake we just use u0
+ NVAPI_D3D12_PSO_SET_SHADER_EXTENSION_SLOT_DESC extensionDesc;
+ extensionDesc.baseVersion = NV_PSO_EXTENSION_DESC_VER;
+ extensionDesc.version = NV_SET_SHADER_EXTENSION_SLOT_DESC_VER;
+ extensionDesc.uavSlot = 0;
+ extensionDesc.registerSpace = 0;
+
+ // Put the pointer to the extension into an array - there can be multiple extensions enabled at once.
+ const NVAPI_D3D12_PSO_EXTENSION_DESC* extensions[] = { &extensionDesc };
+
+ // Now create the PSO.
+ const NvAPI_Status nvapiStatus = NvAPI_D3D12_CreateComputePipelineState(m_device, &computeDesc, SLANG_COUNT_OF(extensions), extensions, pipelineState.writeRef());
+
+ if (nvapiStatus != NVAPI_OK)
+ {
+ return SLANG_FAIL;
+ }
+ }
+ else
+#endif
+ {
+ SLANG_RETURN_ON_FAIL(m_device->CreateComputePipelineState(&computeDesc, IID_PPV_ARGS(pipelineState.writeRef())));
+ }
RefPtr<PipelineStateImpl> pipelineStateImpl = new PipelineStateImpl();
pipelineStateImpl->m_pipelineType = PipelineType::Compute;
diff --git a/tools/gfx/gfx.vcxproj b/tools/gfx/gfx.vcxproj
index d4b1885a8..327f6d629 100644
--- a/tools/gfx/gfx.vcxproj
+++ b/tools/gfx/gfx.vcxproj
@@ -180,6 +180,8 @@
<ClInclude Include="flag-combiner.h" />
<ClInclude Include="gui.h" />
<ClInclude Include="model.h" />
+ <ClInclude Include="nvapi\nvapi-include.h" />
+ <ClInclude Include="nvapi\nvapi-util.h" />
<ClInclude Include="open-gl\render-gl.h" />
<ClInclude Include="render.h" />
<ClInclude Include="surface.h" />
@@ -202,6 +204,7 @@
<ClCompile Include="flag-combiner.cpp" />
<ClCompile Include="gui.cpp" />
<ClCompile Include="model.cpp" />
+ <ClCompile Include="nvapi\nvapi-util.cpp" />
<ClCompile Include="open-gl\render-gl.cpp" />
<ClCompile Include="render.cpp" />
<ClCompile Include="surface.cpp" />
diff --git a/tools/gfx/gfx.vcxproj.filters b/tools/gfx/gfx.vcxproj.filters
index 1c27e4f24..f5fba2295 100644
--- a/tools/gfx/gfx.vcxproj.filters
+++ b/tools/gfx/gfx.vcxproj.filters
@@ -36,6 +36,12 @@
<ClInclude Include="model.h">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="nvapi\nvapi-include.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="nvapi\nvapi-util.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
<ClInclude Include="open-gl\render-gl.h">
<Filter>Header Files</Filter>
</ClInclude>
@@ -98,6 +104,9 @@
<ClCompile Include="model.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="nvapi\nvapi-util.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="open-gl\render-gl.cpp">
<Filter>Source Files</Filter>
</ClCompile>
diff --git a/tools/gfx/nvapi/nvapi-include.h b/tools/gfx/nvapi/nvapi-include.h
new file mode 100644
index 000000000..e3674af95
--- /dev/null
+++ b/tools/gfx/nvapi/nvapi-include.h
@@ -0,0 +1,19 @@
+// nvapi-include.h
+#pragma once
+
+// A helper that makes the NVAPI available across targets
+
+#ifdef GFX_NVAPI
+// On windows if we include NVAPI, we must include windows.h first
+
+# ifdef _WIN32
+# define WIN32_LEAN_AND_MEAN
+# define NOMINMAX
+# include <Windows.h>
+# undef WIN32_LEAN_AND_MEAN
+# undef NOMINMAX
+# endif
+
+# include <nvapi.h>
+#endif
+
diff --git a/tools/gfx/nvapi/nvapi-util.cpp b/tools/gfx/nvapi/nvapi-util.cpp
new file mode 100644
index 000000000..63bcc65fc
--- /dev/null
+++ b/tools/gfx/nvapi/nvapi-util.cpp
@@ -0,0 +1,30 @@
+#include "nvapi-util.h"
+
+#include "nvapi-include.h"
+
+namespace gfx {
+
+static SlangResult g_initStatus = SLANG_E_UNINITIALIZED;
+
+/* static */SlangResult NVAPIUtil::initialize()
+{
+#ifdef GFX_NVAPI
+ if (g_initStatus == SLANG_E_UNINITIALIZED)
+ {
+ NvAPI_Status ret = NVAPI_OK;
+ ret = NvAPI_Initialize();
+ g_initStatus = (ret == NVAPI_OK) ? SLANG_OK : SLANG_E_NOT_AVAILABLE;
+ }
+#else
+ g_initStatus = SLANG_E_NOT_AVAILABLE;
+#endif
+
+ return g_initStatus;
+}
+
+/* static */bool NVAPIUtil::isAvailable()
+{
+ return SLANG_SUCCEEDED(g_initStatus);
+}
+
+} // gfx
diff --git a/tools/gfx/nvapi/nvapi-util.h b/tools/gfx/nvapi/nvapi-util.h
new file mode 100644
index 000000000..704f4ede4
--- /dev/null
+++ b/tools/gfx/nvapi/nvapi-util.h
@@ -0,0 +1,19 @@
+// nvapi-util.h
+#pragma once
+
+#include "../../slang-com-helper.h"
+#include "../../slang-com-ptr.h"
+
+namespace gfx {
+
+struct NVAPIUtil
+{
+ /// Set up NVAPI for use. Must be called before any other function is used.
+ static SlangResult initialize();
+ /// True if the NVAPI is available, can be called even if initialize fails.
+ /// If initialize has not been called will return false
+ static bool isAvailable();
+};
+
+
+} // gfx
diff --git a/tools/gfx/open-gl/render-gl.cpp b/tools/gfx/open-gl/render-gl.cpp
index ee3977a74..5f16f07bc 100644
--- a/tools/gfx/open-gl/render-gl.cpp
+++ b/tools/gfx/open-gl/render-gl.cpp
@@ -1,6 +1,8 @@
// render-gl.cpp
#include "render-gl.h"
+#include "../nvapi/nvapi-util.h"
+
//WORKING:#include "options.h"
#include "../render.h"
@@ -715,6 +717,11 @@ SlangResult GLRenderer::initialize(const Desc& desc, void* inWindowHandle)
}
}
+ if (m_desc.requiredFeatures.indexOf("nvapi") >= 0 && SLANG_SUCCEEDED(NVAPIUtil::initialize()))
+ {
+ m_features.add("nvapi");
+ }
+
auto extensions = glGetString(GL_EXTENSIONS);
// Load each of our extension functions by name
diff --git a/tools/gfx/render.h b/tools/gfx/render.h
index 051b19742..12ef1a9e9 100644
--- a/tools/gfx/render.h
+++ b/tools/gfx/render.h
@@ -800,9 +800,10 @@ public:
struct Desc
{
- int width; ///< Width in pixels
- int height; ///< height in pixels
- Slang::String adapter; ///< Name to identify the adapter to use
+ int width; ///< Width in pixels
+ int height; ///< height in pixels
+ Slang::String adapter; ///< Name to identify the adapter to use
+ Slang::List<Slang::String> requiredFeatures; ///< The features enabled on this renderer
};
virtual SlangResult initialize(const Desc& desc, void* inWindowHandle) = 0;
diff --git a/tools/gfx/vulkan/render-vk.cpp b/tools/gfx/vulkan/render-vk.cpp
index b9cc82469..28567a3b8 100644
--- a/tools/gfx/vulkan/render-vk.cpp
+++ b/tools/gfx/vulkan/render-vk.cpp
@@ -1025,31 +1025,40 @@ SlangResult VKRenderer::initialize(const Desc& desc, void* inWindowHandle)
const uint32_t majorVersion = VK_VERSION_MAJOR(basicProps.apiVersion);
const uint32_t minorVersion = VK_VERSION_MINOR(basicProps.apiVersion);
- // Float16 features
// Need in this scope because it will be linked into the device creation (if it is available)
+
+ // Float16 features
VkPhysicalDeviceFloat16Int8FeaturesKHR float16Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR };
+ // AtomicInt64 features
+ VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomicInt64Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR };
+ // Atomic Float features
+ VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomicFloatFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT };
// API version check, can't use vkGetPhysicalDeviceProperties2 yet since this device might not support it
if (VK_MAKE_VERSION(majorVersion, minorVersion, 0) >= VK_API_VERSION_1_1 &&
m_api.vkGetPhysicalDeviceProperties2 &&
m_api.vkGetPhysicalDeviceFeatures2)
{
- VkPhysicalDeviceProperties2 physicalDeviceProps2;
-
- physicalDeviceProps2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
- physicalDeviceProps2.pNext = nullptr;
- physicalDeviceProps2.properties = {};
-
- m_api.vkGetPhysicalDeviceProperties2(m_api.m_physicalDevice, &physicalDeviceProps2);
// Get device features
VkPhysicalDeviceFeatures2 deviceFeatures2 = {};
deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
- // Link together for lookup
+ // Float16
float16Features.pNext = deviceFeatures2.pNext;
deviceFeatures2.pNext = &float16Features;
+ // Atomic64
+ atomicInt64Features.pNext = deviceFeatures2.pNext;
+ deviceFeatures2.pNext = &atomicInt64Features;
+
+ // Atomic Float
+ // To detect atomic float we need
+ // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkPhysicalDeviceShaderAtomicFloatFeaturesEXT.html
+
+ atomicFloatFeatures.pNext = deviceFeatures2.pNext;
+ deviceFeatures2.pNext = &atomicFloatFeatures;
+
m_api.vkGetPhysicalDeviceFeatures2(m_api.m_physicalDevice, &deviceFeatures2);
// If we have float16 features then enable
@@ -1064,7 +1073,27 @@ SlangResult VKRenderer::initialize(const Desc& desc, void* inWindowHandle)
// We have half support
m_features.add("half");
- }
+ }
+
+ if (atomicInt64Features.shaderBufferInt64Atomics)
+ {
+ // Link into the creation features
+ atomicInt64Features.pNext = (void*)deviceCreateInfo.pNext;
+ deviceCreateInfo.pNext = &atomicInt64Features;
+
+ deviceExtensions.add(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
+ m_features.add("atomic-int64");
+ }
+
+ if (atomicFloatFeatures.shaderBufferFloat32AtomicAdd)
+ {
+ // Link into the creation features
+ atomicFloatFeatures.pNext = (void*)deviceCreateInfo.pNext;
+ deviceCreateInfo.pNext = &atomicFloatFeatures;
+
+ deviceExtensions.add(VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME);
+ m_features.add("atomic-float");
+ }
}
int queueFamilyIndex = m_api.findQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT);
@@ -2345,8 +2374,7 @@ Result VKRenderer::createDescriptorSetLayout(const DescriptorSetLayout::Desc& de
RefPtr<DescriptorSetLayoutImpl> descriptorSetLayoutImpl = new DescriptorSetLayoutImpl(m_api);
Slang::List<VkDescriptorSetLayoutBinding> dstBindings;
-
- uint32_t descriptorCountForTypes[VK_DESCRIPTOR_TYPE_RANGE_SIZE] = { 0, };
+ Slang::List<uint32_t> descriptorCountForTypes;
UInt rangeCount = desc.slotRangeCount;
for(UInt rr = 0; rr < rangeCount; ++rr)
@@ -2378,7 +2406,7 @@ Result VKRenderer::createDescriptorSetLayout(const DescriptorSetLayout::Desc& de
auto rootConstantRangeIndex = descriptorSetLayoutImpl->m_rootConstantRanges.getCount();
descriptorSetLayoutImpl->m_rootConstantRanges.add(rootConstantRangeInfo);
- // We will also add a `RangeInfo` to reprsent this
+ // We will also add a `RangeInfo` to represent this
// range, even though it doesn't map to a VK-level
// descriptor range.
//
@@ -2413,6 +2441,11 @@ Result VKRenderer::createDescriptorSetLayout(const DescriptorSetLayout::Desc& de
dstBinding.stageFlags = VK_SHADER_STAGE_ALL;
dstBinding.pImmutableSamplers = nullptr;
+ if (descriptorCountForTypes.getCount() <= dstDescriptorType)
+ {
+ descriptorCountForTypes.setCount(dstDescriptorType + 1);
+ }
+
descriptorCountForTypes[dstDescriptorType] += uint32_t(srcRange.count);
dstBindings.add(dstBinding);
@@ -2443,23 +2476,23 @@ Result VKRenderer::createDescriptorSetLayout(const DescriptorSetLayout::Desc& de
// Create a pool while we are at it, to allocate descriptor sets of this type.
- VkDescriptorPoolSize poolSizes[VK_DESCRIPTOR_TYPE_RANGE_SIZE];
- uint32_t poolSizeCount = 0;
- for (int ii = 0; ii < SLANG_COUNT_OF(descriptorCountForTypes); ++ii)
+ List<VkDescriptorPoolSize> poolSizes;
+ for (Index ii = 0; ii < descriptorCountForTypes.getCount(); ++ii)
{
auto descriptorCount = descriptorCountForTypes[ii];
if (descriptorCount > 0)
{
- poolSizes[poolSizeCount].type = VkDescriptorType(ii);
- poolSizes[poolSizeCount].descriptorCount = descriptorCount;
- poolSizeCount++;
+ VkDescriptorPoolSize poolSize;
+ poolSize.type = VkDescriptorType(ii);
+ poolSize.descriptorCount = descriptorCount;
+ poolSizes.add(poolSize);
}
}
VkDescriptorPoolCreateInfo descriptorPoolInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO };
descriptorPoolInfo.maxSets = 128; // TODO: actually pick a size.
- descriptorPoolInfo.poolSizeCount = poolSizeCount;
- descriptorPoolInfo.pPoolSizes = &poolSizes[0];
+ descriptorPoolInfo.poolSizeCount = uint32_t(poolSizes.getCount());
+ descriptorPoolInfo.pPoolSizes = poolSizes.getBuffer();
VkDescriptorPool descriptorPool = VK_NULL_HANDLE;
SLANG_VK_CHECK(m_api.vkCreateDescriptorPool(m_device, &descriptorPoolInfo, nullptr, &descriptorPool));
@@ -2712,7 +2745,7 @@ void VKRenderer::DescriptorSetImpl::setRootConstants(
SLANG_ASSERT(rootConstantIndex >= 0);
SLANG_ASSERT(rootConstantIndex < m_layout->m_rootConstantRanges.getCount());
auto& rootConstantRangeInfo = m_layout->m_rootConstantRanges[rootConstantIndex];
- SLANG_ASSERT(offset + size <= rootConstantRangeInfo.size);
+ SLANG_ASSERT(offset + size <= UInt(rootConstantRangeInfo.size));
memcpy(m_rootConstantData.getBuffer() + rootConstantRangeInfo.offset + offset, data, size);
}