From fcac02e405661de311b5ceebbd6d3e2c78bf8aea Mon Sep 17 00:00:00 2001
From: jsmall-nvidia <jsmall@nvidia.com>
Date: Fri, 21 Aug 2020 16:04:42 -0400
Subject: Vulkan update/NVAPI support (#1511)

* First pass at incorporating nvapi into test harness.

* D3d12 Atomic Float Add via NVAPI working

* Dx12 atomic float appears to work.

* Atomic float add on Dx12.

* Added atomic64 feature addition to vk.
Fix correct output for atomic-float-byte-address.slang

* Disable atomic float failing tests.

* Upgraded VK headers.

* Detect atomic float availability on VK.

* Try to get test working for in64 atomic.

* Made HLSL prelude controlled via the render-test requirements.

* Added -enable-nvapi to premake.

* Fix D3D12Renderer when NVAPI is not available.

* Small improvements to VKRenderer.

* Improve atomic documentation in target-compatibility.md.
---
 source/slang/hlsl.meta.slang | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'source/slang')

diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index dc87eb6fd..8ec5c2c67 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -196,16 +196,20 @@ ${{{{
 
     // float32 and int64 atomic support. This is a Slang specific extension, it uses
     // GL_EXT_shader_atomic_float on vk
-    // NVAPI support on DX
-    // NOTE! To use this feature on HLSL, the shader needs to include 'nvHLSLExtns.h' from the NVAPI SDK
-    //
+    // NvAPI support on DX
+    // NOTE! To use this feature on HLSL, the shader needs to include 'nvHLSLExtns.h' from the NvAPI SDK
+    // 
+    // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#VK_EXT_shader_atomic_float
+    // https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/EXT/SPV_EXT_shader_atomic_float_add.html
 
     // Fp32 
 
     __target_intrinsic(hlsl, "($3 = NvInterlockedAddFp32($0, $1, $2))")
+    __cuda_sm_version(2.0)
     __target_intrinsic(cuda, "(*$3 = atomicAdd((float*)$0._getPtrAt($1), $2))")
     void InterlockedAddFp32(uint byteAddress, float valueToAdd, out float originalValue);
 
+    
     __specialized_for_target(glsl)
     void InterlockedAddFp32(uint byteAddress, float valueToAdd, out float originalValue)
     {
@@ -216,6 +220,7 @@ ${{{{
     // Without returning original value
 
     __target_intrinsic(hlsl, "(NvInterlockedAddFp32($0, $1, $2))")
+    __cuda_sm_version(2.0)
     __target_intrinsic(cuda, "atomicAdd((float*)$0._getPtrAt($1), $2)")
     void InterlockedAddFp32(uint byteAddress, float valueToAdd);
 
-- 
cgit v1.2.3