From af035fb6da2a19ccc647515e9b1edf35777f8c89 Mon Sep 17 00:00:00 2001
From: Yong He <yonghe@outlook.com>
Date: Mon, 5 Feb 2024 20:07:58 -0800
Subject: Add glsl implementation of Texture.InterlockedAddF32 (#3550)

---
 source/slang/hlsl.meta.slang | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'source')

diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 29c9d4c51..1e0b95c81 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -2528,7 +2528,6 @@ ${{{{
     }
 
     // FP16x2
-    __cuda_sm_version(2.0)
     [__requiresNVAPI]
     uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value)
     {
@@ -2539,7 +2538,6 @@ ${{{{
         }
     }
 
-    __cuda_sm_version(2.0)
     [__requiresNVAPI]
     [ForceInline]
     void InterlockedAddF16(uint byteAddress, half value, out half originalValue)
@@ -2555,7 +2553,7 @@ ${{{{
             else
             {
                 byteAddress = byteAddress & ~3;
-                uint packedInput = asuint16(value) << 16;
+                uint packedInput = ((uint)asuint16(value)) << 16;
                 originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16));
             }
             return;
@@ -13014,6 +13012,7 @@ __generic<Shape:__ITextureShape1D2D3D, let format : int>
 extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite), 0, 0, format>
 {
     [__requiresNVAPI]
+    __glsl_extension(GL_EXT_shader_atomic_float)
     void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue)
     {
         __target_switch
@@ -13021,6 +13020,8 @@ extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite)
         case spirv:
             originalValue = __atomicAdd(this[coord], value);
             return;
+        case glsl:
+            __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)";
         case hlsl:
             __intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)";
         }
-- 
cgit v1.2.3