summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjsmall-nvidia <jsmall@nvidia.com>2020-08-24 14:26:27 -0400
committerGitHub <noreply@github.com>2020-08-24 11:26:27 -0700
commit67ca54997d445e15891965b8d77561b9d10bb18c (patch)
tree8719fb2ff6a42f24daef5ca77d046a55b56d5b98
parentfcac02e405661de311b5ceebbd6d3e2c78bf8aea (diff)
NVAPI improvements (#1512)
* First pass at incorporating nvapi into test harness. * D3d12 Atomic Float Add via NVAPI working * Dx12 atomic float appears to work. * Atomic float add on Dx12. * Added atomic64 feature addition to vk. Fix correct output for atomic-float-byte-address.slang * Disable atomic float failing tests. * Upgraded VK headers. * Detect atomic float availability on VK. * Try to get test working for in64 atomic. * Made HLSL prelude controlled via the render-test requirements. * Added -enable-nvapi to premake. * Fix D3D12Renderer when NVAPI is not available. * Small improvements to VKRenderer. * Improve atomic documentation in target-compatibility.md. * Fixed NVAPI working on D3D12. * Test for specific NVAPI features. * Remove requiredFeatures from Renderer::Desc as was ignored. Tried to document more around nvapiExtnSlot. * Readded requiredFeatures to Renderer::Desc * Improve comments in the tests.
-rw-r--r--tests/slang-extension/atomic-float-byte-address-buffer.slang14
-rw-r--r--tests/slang-extension/atomic-int64-byte-address-buffer.slang9
-rw-r--r--tools/gfx/d3d11/render-d3d11.cpp48
-rw-r--r--tools/gfx/d3d12/render-d3d12.cpp57
-rw-r--r--tools/gfx/nvapi/nvapi-include.h2
-rw-r--r--tools/gfx/open-gl/render-gl.cpp7
-rw-r--r--tools/gfx/render.h8
-rw-r--r--tools/render-test/options.cpp4
-rw-r--r--tools/render-test/options.h2
-rw-r--r--tools/render-test/render-test-main.cpp33
10 files changed, 111 insertions, 73 deletions
diff --git a/tests/slang-extension/atomic-float-byte-address-buffer.slang b/tests/slang-extension/atomic-float-byte-address-buffer.slang
index a332ddeac..603b92d65 100644
--- a/tests/slang-extension/atomic-float-byte-address-buffer.slang
+++ b/tests/slang-extension/atomic-float-byte-address-buffer.slang
@@ -1,18 +1,14 @@
// atomic-float-byte-address-buffer.slang
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -output-using-type
-// Produces a different result
-//TEST(compute):COMPARE_COMPUTE_EX:-dx11 -slang -compute -render-features atomic-float -output-using-type -nvapi-register u0
-// Disabled because crashes currently on vulkan
-// https://vulkan.gpuinfo.org/listdevicescoverage.php?extension=VK_EXT_shader_atomic_float
+//TEST(compute):COMPARE_COMPUTE_EX:-dx11 -slang -compute -render-features atomic-float -output-using-type -nvapi-slot u0
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-float -output-using-type
-// Doesn't work on D3D12 for unknown reasons
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-register u0
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -use-dxil -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-register u0
+//TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-slot u0
+//TEST(compute):COMPARE_COMPUTE_EX:-d3d12 -compute -use-dxil -render-features atomic-float -output-using-type -compile-arg -O2 -nvapi-slot u0
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type
-// We don't use this, but having this defined makes the 0 slot available if nvapi is going to be used
-// Only strictly necessary on the dx12 path
+// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used
+// Only strictly necessary on the D3D11/D3D12 paths
//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):name=nvapiBuffer
RWStructuredBuffer<int> nvapiBuffer;
diff --git a/tests/slang-extension/atomic-int64-byte-address-buffer.slang b/tests/slang-extension/atomic-int64-byte-address-buffer.slang
index 216e55dc2..628c675a2 100644
--- a/tests/slang-extension/atomic-int64-byte-address-buffer.slang
+++ b/tests/slang-extension/atomic-int64-byte-address-buffer.slang
@@ -4,14 +4,13 @@
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute
// No support for int64_t on fxc - we need SM6.0 and dxil
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-register u0
-// Doesn't work on current version of driver. The atomics have no effect perhaps meaning they aren't being decoded correctly
-//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-register u0 -compile-arg -O2
+//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2
//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute
-// We don't use this, but having this defined makes the 0 slot available if nvapi is going to be used
-// Only strictly necessary on the dx12 path
+// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used
+// Only strictly necessary on the D3D12 path
//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):name=nvapiBuffer
RWStructuredBuffer<int> nvapiBuffer;
diff --git a/tools/gfx/d3d11/render-d3d11.cpp b/tools/gfx/d3d11/render-d3d11.cpp
index 8eafd24b4..30c982ab3 100644
--- a/tools/gfx/d3d11/render-d3d11.cpp
+++ b/tools/gfx/d3d11/render-d3d11.cpp
@@ -514,6 +514,19 @@ D3D11Renderer::ScopeNVAPI::~ScopeNVAPI()
// !!!!!!!!!!!!!!!!!!!!!!!!!!!! Renderer interface !!!!!!!!!!!!!!!!!!!!!!!!!!
+static bool _isSupportedNVAPIOp(IUnknown* dev, uint32_t op)
+{
+#ifdef GFX_NVAPI
+ {
+ bool isSupported;
+ NvAPI_Status status = NvAPI_D3D11_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported);
+ return status == NVAPI_OK && isSupported;
+ }
+#else
+ return false;
+#endif
+}
+
SlangResult D3D11Renderer::initialize(const Desc& desc, void* inWindowHandle)
{
auto windowHandle = (HWND)inWindowHandle;
@@ -647,29 +660,32 @@ SlangResult D3D11Renderer::initialize(const Desc& desc, void* inWindowHandle)
}
// NVAPI
+ if (desc.nvapiExtnSlot >= 0)
{
- const char* features[] = { "nvapi", "atomic-float", "atomic-int64" };
- bool needsNvapi = false;
- for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
+ if (SLANG_FAILED(NVAPIUtil::initialize()))
{
- if (desc.requiredFeatures.indexOf(features[i]) >= 0)
- {
- needsNvapi = true;
- break;
- }
+ return SLANG_E_NOT_AVAILABLE;
}
- if (needsNvapi && SLANG_SUCCEEDED(NVAPIUtil::initialize()))
+#ifdef GFX_NVAPI
+ if (NvAPI_D3D11_SetNvShaderExtnSlot(m_device, NvU32(desc.nvapiExtnSlot)) != NVAPI_OK)
{
- // TODO(JS): We should test for specific features here.
- for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
- {
- m_features.add(features[i]);
- }
- m_nvapi = true;
+ return SLANG_E_NOT_AVAILABLE;
}
- }
+ if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC ))
+ {
+ m_features.add("atomic-int64");
+ }
+ if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC))
+ {
+ m_features.add("atomic-float");
+ }
+
+ m_nvapi = true;
+#endif
+ }
+
// TODO: Add support for debugging to help detect leaks:
//
// ComPtr<ID3D11Debug> gDebug;
diff --git a/tools/gfx/d3d12/render-d3d12.cpp b/tools/gfx/d3d12/render-d3d12.cpp
index ca44aa04d..0f23d8dd4 100644
--- a/tools/gfx/d3d12/render-d3d12.cpp
+++ b/tools/gfx/d3d12/render-d3d12.cpp
@@ -1523,6 +1523,19 @@ Result D3D12Renderer::_createDevice(DeviceCheckFlags deviceCheckFlags, const Uno
return SLANG_OK;
}
+static bool _isSupportedNVAPIOp(ID3D12Device* dev, uint32_t op)
+{
+#ifdef GFX_NVAPI
+ {
+ bool isSupported;
+ NvAPI_Status status = NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(dev, NvU32(op), &isSupported);
+ return status == NVAPI_OK && isSupported;
+ }
+#else
+ return false;
+#endif
+}
+
Result D3D12Renderer::initialize(const Desc& desc, void* inWindowHandle)
{
m_hwnd = (HWND)inWindowHandle;
@@ -1602,32 +1615,40 @@ Result D3D12Renderer::initialize(const Desc& desc, void* inWindowHandle)
return SLANG_FAIL;
}
+ // Set the device
+ m_device = m_deviceInfo.m_device;
+
// NVAPI
+ if (desc.nvapiExtnSlot >= 0)
{
- const char* features[] = { "nvapi", "atomic-float", "atomic-int64" };
- bool needsNvapi = false;
- for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
+ if (SLANG_FAILED(NVAPIUtil::initialize()))
{
- if (desc.requiredFeatures.indexOf(features[i]) >= 0)
- {
- needsNvapi = true;
- break;
- }
+ return SLANG_E_NOT_AVAILABLE;
}
- if (needsNvapi && SLANG_SUCCEEDED(NVAPIUtil::initialize()))
+#ifdef GFX_NVAPI
+ // From DOCS: Applications are expected to bind null UAV to this slot.
+ // NOTE! We don't currently do this, but doesn't seem to be a problem.
+
+ const NvAPI_Status status = NvAPI_D3D12_SetNvShaderExtnSlotSpace(m_device, NvU32(desc.nvapiExtnSlot), NvU32(0));
+
+ if (status != NVAPI_OK)
{
- // TODO(JS): We should test for specific features here.
- for (Index i = 0; i < SLANG_COUNT_OF(features); ++i)
- {
- m_features.add(features[i]);
- }
- m_nvapi = true;
+ return SLANG_E_NOT_AVAILABLE;
}
- }
- // Set the device
- m_device = m_deviceInfo.m_device;
+ if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_UINT64_ATOMIC))
+ {
+ m_features.add("atomic-int64");
+ }
+ if (_isSupportedNVAPIOp(m_device, NV_EXTN_OP_FP32_ATOMIC))
+ {
+ m_features.add("atomic-float");
+ }
+
+ m_nvapi = true;
+#endif
+ }
// Find what features are supported
{
diff --git a/tools/gfx/nvapi/nvapi-include.h b/tools/gfx/nvapi/nvapi-include.h
index e3674af95..c213e0bfb 100644
--- a/tools/gfx/nvapi/nvapi-include.h
+++ b/tools/gfx/nvapi/nvapi-include.h
@@ -15,5 +15,7 @@
# endif
# include <nvapi.h>
+# include <nvShaderExtnEnums.h>
+
#endif
diff --git a/tools/gfx/open-gl/render-gl.cpp b/tools/gfx/open-gl/render-gl.cpp
index 5f16f07bc..caf8794c0 100644
--- a/tools/gfx/open-gl/render-gl.cpp
+++ b/tools/gfx/open-gl/render-gl.cpp
@@ -717,9 +717,12 @@ SlangResult GLRenderer::initialize(const Desc& desc, void* inWindowHandle)
}
}
- if (m_desc.requiredFeatures.indexOf("nvapi") >= 0 && SLANG_SUCCEEDED(NVAPIUtil::initialize()))
+ if (m_desc.nvapiExtnSlot >= 0)
{
- m_features.add("nvapi");
+ if (SLANG_FAILED(NVAPIUtil::initialize()))
+ {
+ return SLANG_E_NOT_AVAILABLE;
+ }
}
auto extensions = glGetString(GL_EXTENSIONS);
diff --git a/tools/gfx/render.h b/tools/gfx/render.h
index 12ef1a9e9..55beb5774 100644
--- a/tools/gfx/render.h
+++ b/tools/gfx/render.h
@@ -800,12 +800,14 @@ public:
struct Desc
{
- int width; ///< Width in pixels
- int height; ///< height in pixels
+ int width = 0; ///< Width in pixels
+ int height = 0; ///< height in pixels
Slang::String adapter; ///< Name to identify the adapter to use
- Slang::List<Slang::String> requiredFeatures; ///< The features enabled on this renderer
+ Slang::List<Slang::String> requiredFeatures; ///< List of required feature names.
+ int nvapiExtnSlot = -1; ///< The slot (typically UAV) used to identify NVAPI intrinsics. If >=0 NVAPI is required.
};
+ // Will return with SLANG_E_NOT_AVAILABLE if NVAPI can't be initialized and nvapiExtnSlot >= 0
virtual SlangResult initialize(const Desc& desc, void* inWindowHandle) = 0;
bool hasFeature(const Slang::UnownedStringSlice& feature) { return getFeatures().indexOf(Slang::String(feature)) != Slang::Index(-1); }
diff --git a/tools/render-test/options.cpp b/tools/render-test/options.cpp
index fd4b75ed1..797439cfa 100644
--- a/tools/render-test/options.cpp
+++ b/tools/render-test/options.cpp
@@ -240,7 +240,7 @@ static SlangResult _setRendererType(RendererType type, const char* arg, Slang::W
{
outOptions.dontAddDefaultEntryPoints = true;
}
- else if (strcmp(arg, "-nvapi-register") == 0)
+ else if (strcmp(arg, "-nvapi-slot") == 0)
{
if (argCursor == argEnd)
{
@@ -248,7 +248,7 @@ static SlangResult _setRendererType(RendererType type, const char* arg, Slang::W
return SLANG_FAIL;
}
- outOptions.nvapiRegister = (*argCursor++);
+ outOptions.nvapiExtnSlot = (*argCursor++);
}
else
{
diff --git a/tools/render-test/options.h b/tools/render-test/options.h
index d311568d4..ddb903a4a 100644
--- a/tools/render-test/options.h
+++ b/tools/render-test/options.h
@@ -75,7 +75,7 @@ struct Options
uint32_t computeDispatchSize[3] = { 1, 1, 1 };
- Slang::String nvapiRegister; ///< The nvapiRegister to use.
+ Slang::String nvapiExtnSlot; ///< The nvapiRegister to use.
static SlangResult parse(int argc, const char*const* argv, Slang::WriterHelper stdError, Options& outOptions);
};
diff --git a/tools/render-test/render-test-main.cpp b/tools/render-test/render-test-main.cpp
index e7356901d..25486c722 100644
--- a/tools/render-test/render-test-main.cpp
+++ b/tools/render-test/render-test-main.cpp
@@ -12,6 +12,7 @@
#include "shader-renderer-util.h"
#include "../source/core/slang-io.h"
+#include "../source/core/slang-string-util.h"
#include "core/slang-token-reader.h"
@@ -406,7 +407,7 @@ Result RenderTestApp::update(Window* window)
static SlangResult _setSessionPrelude(const Options& options, const char* exePath, SlangSession* session)
{
// Let's see if we need to set up special prelude for HLSL
- if (options.nvapiRegister.getLength())
+ if (options.nvapiExtnSlot.getLength())
{
String rootPath;
SLANG_RETURN_ON_FAIL(TestToolUtil::getRootPath(exePath, rootPath));
@@ -416,7 +417,7 @@ static SlangResult _setSessionPrelude(const Options& options, const char* exePat
StringBuilder buf;
// We have to choose a slot that NVAPI will use.
- buf << "#define NV_SHADER_EXTN_SLOT " << options.nvapiRegister << "\n";
+ buf << "#define NV_SHADER_EXTN_SLOT " << options.nvapiExtnSlot << "\n";
// Include the NVAPI header
buf << "#include \"" << includePath << "\"\n\n";
@@ -589,13 +590,18 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
}
}
+ Index nvapiExtnSlot = -1;
+
// Let's see if we need to set up special prelude for HLSL
- if (options.nvapiRegister.getLength())
+ if (options.nvapiExtnSlot.getLength() && options.nvapiExtnSlot[0] == 'u')
{
- // We require nvapi to be available on the device
- if (options.renderFeatures.indexOf("nvapi") < 0)
+ //
+ Slang::Int value;
+ UnownedStringSlice slice = options.nvapiExtnSlot.getUnownedSlice();
+ UnownedStringSlice indexText(slice.begin() + 1 , slice.end());
+ if (SLANG_SUCCEEDED(StringUtil::parseInt(indexText, value)))
{
- options.renderFeatures.add("nvapi");
+ nvapiExtnSlot = Index(value);
}
}
@@ -716,16 +722,6 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
#endif
}
- if (options.nvapiRegister.getLength())
- {
- // We require nvapi to be available on the device
- if (options.renderFeatures.indexOf("nvapi") < 0)
- {
- options.renderFeatures.add("nvapi");
- }
- }
-
-
Slang::RefPtr<Renderer> renderer;
{
RendererUtil::CreateFunc createFunc = RendererUtil::getCreateFunc(options.rendererType);
@@ -748,6 +744,7 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
desc.height = gWindowHeight;
desc.adapter = options.adapter;
desc.requiredFeatures = options.renderFeatures;
+ desc.nvapiExtnSlot = int(nvapiExtnSlot);
window = renderer_test::Window::create();
SLANG_RETURN_ON_FAIL(window->initialize(gWindowWidth, gWindowHeight));
@@ -755,7 +752,9 @@ static SlangResult _innerMain(Slang::StdWriters* stdWriters, SlangSession* sessi
SlangResult res = renderer->initialize(desc, window->getHandle());
if (SLANG_FAILED(res))
{
- if (!options.onlyStartup)
+ // Returns E_NOT_AVAILABLE only when specified features are not available.
+ // Will cause to be ignored.
+ if (!options.onlyStartup && res != SLANG_E_NOT_AVAILABLE)
{
fprintf(stderr, "Unable to initialize renderer %s\n", rendererName.getBuffer());
}