diff options
4 files changed, 75 insertions, 3 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index a4f551644..9893effea 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -149,6 +149,17 @@ __glsl_version(430) __glsl_extension(GL_EXT_shader_atomic_int64) uint64_t __atomicXor(__ref uint64_t ioValue, uint64_t value); +// Exchange + +__target_intrinsic(hlsl, "NvInterlockedExchangeUint64($0, $1, $2)") +[__requiresNVAPI] +uint2 __atomicExchange(RWByteAddressBuffer buf, uint offset, uint2 value); + +__target_intrinsic(glsl, "atomicExchange($0, $1)") +__glsl_version(430) +__glsl_extension(GL_EXT_shader_atomic_int64) +uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value); + // Conversion between uint64_t and uint2 uint2 __asuint2(uint64_t i) @@ -280,9 +291,15 @@ ${{{{ }}}} // float32 and int64 atomic support. This is a Slang specific extension, it uses - // GL_EXT_shader_atomic_float on vk + // GL_EXT_shader_atomic_float on Vulkan // NvAPI support on DX - // NOTE! To use this feature on HLSL, the shader needs to include 'nvHLSLExtns.h' from the NvAPI SDK + // NOTE! To use this feature on HLSL based targets the path to 'nvHLSLExtns.h' from the NvAPI SDK must + // be set. That this include will be added to the *output* that is passed to a downstram compiler. + // Also note that you *can* include NVAPI headers in your Slang source, and directly use NVAPI functions + // Directly using NVAPI functions does *not* add the #include on the output + // Finally note you can *mix* NVAPI direct calls, and use of NVAPI intrinsics below. This doesn't cause + // any clashes, as Slang will emit any NVAPI function it parsed (say via a include in Slang source) with + // unique functions. // // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#VK_EXT_shader_atomic_float // https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/EXT/SPV_EXT_shader_atomic_float_add.html @@ -448,6 +465,21 @@ ${{{{ return __atomicXor(buf[byteAddress / 8], value); } + // Exchange + + __target_intrinsic(cuda, "atomicExch((uint64_t*)$0._getPtrAt($1), $2)") + uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value); + + __specialized_for_target(hlsl) + uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); } + + __specialized_for_target(glsl) + uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) + { + RWStructuredBuffer<uint64_t> buf = __getEquivalentStructuredBuffer<uint64_t>(this); + return __atomicExchange(buf[byteAddress / 8], value); + } + ${{{{ } }}}} diff --git a/tests/slang-extension/cas-int64-byte-address-buffer.slang b/tests/slang-extension/cas-int64-byte-address-buffer.slang index b75a9fa04..451d97e36 100644 --- a/tests/slang-extension/cas-int64-byte-address-buffer.slang +++ b/tests/slang-extension/cas-int64-byte-address-buffer.slang @@ -24,7 +24,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) int idx = (tid & 3) ^ (tid >> 2); // Try directly reading - uint2 currentValue2 = outputBuffer.Load2(idx << 8); + uint2 currentValue2 = outputBuffer.Load2(idx << 3); uint64_t currentValue = uint64_t(currentValue2.y) | currentValue2.x; while (true) diff --git a/tests/slang-extension/exchange-int64-byte-address-buffer.slang b/tests/slang-extension/exchange-int64-byte-address-buffer.slang new file mode 100644 index 000000000..0145d3838 --- /dev/null +++ b/tests/slang-extension/exchange-int64-byte-address-buffer.slang @@ -0,0 +1,32 @@ +// No atomic support on CPU +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute +// No support for int64_t on DX11 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute +// No support for int64_t on fxc - we need SM6.0 and dxil +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 +//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 +//TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute + +// The test doesn't directly use this, but having this defined makes the 0 slot available if NVAPI is going to be used +// Only strictly necessary on the D3D12 path +//TEST_INPUT:ubuffer(data=[0 0 0 0 ], stride=4):name=nvapiBuffer +RWStructuredBuffer<int> nvapiBuffer; + +//TEST_INPUT:ubuffer(data=[0 1 2 3 4 5 6 7]):out,name=outputBuffer +RWByteAddressBuffer outputBuffer; + +// With only 4 threads there is no contention - which makes for a simple test +// but doesn't actually test for the exchange atomicity +[numthreads(4, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + int idx = dispatchThreadID.x; + + // Try directly reading + uint2 currentValue2 = outputBuffer.Load2(idx << 3); + uint64_t currentValue = uint64_t(currentValue2.y) | currentValue2.x; + + uint64_t readValue = outputBuffer.InterlockedExchangeU64(idx << 3, currentValue + 1); +}
\ No newline at end of file diff --git a/tests/slang-extension/exchange-int64-byte-address-buffer.slang.expected.txt b/tests/slang-extension/exchange-int64-byte-address-buffer.slang.expected.txt new file mode 100644 index 000000000..4346d293e --- /dev/null +++ b/tests/slang-extension/exchange-int64-byte-address-buffer.slang.expected.txt @@ -0,0 +1,8 @@ +2 +0 +4 +0 +6 +0 +8 +0 |
