InterlockedExchangeU64 support on RWByteAddressBuffer (#1572)

* #include an absolute path didn't work - because paths were taken to always be relative. * Added [__requiresNVAPI] to functions that need nvapi support. * Added support for InterlockedExchangeU64 Added exchange-int64-byte-address-buffer test Fixed typo in cas-int64-byte-address-buffer test * Improve comment around NVAPI usage in hlsl.meta.slang
author: jsmall-nvidia <jsmall@nvidia.com> 2020-10-06 13:30:55 -0400
committer: GitHub <noreply@github.com> 2020-10-06 13:30:55 -0400
commit: 8a70e20df6f47678c146eb29f89655aa734f97c7 (patch)
tree: 49aab68137de0001ae853b0deb1f03f533745f53 /source
parent: b6ad8dfb65358271f52ba76676db1926a90dcd46 (diff)
1 files changed, 34 insertions, 2 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index a4f551644..9893effea 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -149,6 +149,17 @@ __glsl_version(430)
 __glsl_extension(GL_EXT_shader_atomic_int64)
 uint64_t __atomicXor(__ref uint64_t ioValue, uint64_t value);
 
+// Exchange
+
+__target_intrinsic(hlsl, "NvInterlockedExchangeUint64($0, $1, $2)")
+[__requiresNVAPI]
+uint2 __atomicExchange(RWByteAddressBuffer buf, uint offset, uint2 value);
+
+__target_intrinsic(glsl, "atomicExchange($0, $1)")
+__glsl_version(430)
+__glsl_extension(GL_EXT_shader_atomic_int64)
+uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value);
+
 // Conversion between uint64_t and uint2
 
 uint2 __asuint2(uint64_t i)
@@ -280,9 +291,15 @@ ${{{{
 }}}}
 
     // float32 and int64 atomic support. This is a Slang specific extension, it uses
-    // GL_EXT_shader_atomic_float on vk
+    // GL_EXT_shader_atomic_float on Vulkan
     // NvAPI support on DX
-    // NOTE! To use this feature on HLSL, the shader needs to include 'nvHLSLExtns.h' from the NvAPI SDK
+    // NOTE! To use this feature on HLSL based targets the path to 'nvHLSLExtns.h' from the NvAPI SDK must
+    // be set. That this include will be added to the *output* that is passed to a downstram compiler.
+    // Also note that you *can* include NVAPI headers in your Slang source, and directly use NVAPI functions
+    // Directly using NVAPI functions does *not* add the #include on the output
+    // Finally note you can *mix* NVAPI direct calls, and use of NVAPI intrinsics below. This doesn't cause
+    // any clashes, as Slang will emit any NVAPI function it parsed (say via a include in Slang source) with
+    // unique functions.
     // 
     // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#VK_EXT_shader_atomic_float
     // https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/EXT/SPV_EXT_shader_atomic_float_add.html
@@ -448,6 +465,21 @@ ${{{{
         return __atomicXor(buf[byteAddress / 8], value);
     }
 
+    // Exchange
+
+    __target_intrinsic(cuda, "atomicExch((uint64_t*)$0._getPtrAt($1), $2)")
+    uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value);
+
+    __specialized_for_target(hlsl)
+    uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); }
+
+    __specialized_for_target(glsl)
+    uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value)
+    {
+        RWStructuredBuffer<uint64_t> buf = __getEquivalentStructuredBuffer<uint64_t>(this);
+        return __atomicExchange(buf[byteAddress / 8], value);
+    }
+
 ${{{{
     }
 }}}}
author	jsmall-nvidia <jsmall@nvidia.com>	2020-10-06 13:30:55 -0400
committer	GitHub <noreply@github.com>	2020-10-06 13:30:55 -0400
commit	8a70e20df6f47678c146eb29f89655aa734f97c7 (patch)
tree	49aab68137de0001ae853b0deb1f03f533745f53 /source
parent	b6ad8dfb65358271f52ba76676db1926a90dcd46 (diff)