diff options
| -rw-r--r-- | source/slang/hlsl.meta.slang | 7 | ||||
| -rw-r--r-- | tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang | 4 |
2 files changed, 6 insertions, 5 deletions
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 29c9d4c51..1e0b95c81 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -2528,7 +2528,6 @@ ${{{{ } // FP16x2 - __cuda_sm_version(2.0) [__requiresNVAPI] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { @@ -2539,7 +2538,6 @@ ${{{{ } } - __cuda_sm_version(2.0) [__requiresNVAPI] [ForceInline] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) @@ -2555,7 +2553,7 @@ ${{{{ else { byteAddress = byteAddress & ~3; - uint packedInput = asuint16(value) << 16; + uint packedInput = ((uint)asuint16(value)) << 16; originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; @@ -13014,6 +13012,7 @@ __generic<Shape:__ITextureShape1D2D3D, let format : int> extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite), 0, 0, format> { [__requiresNVAPI] + __glsl_extension(GL_EXT_shader_atomic_float) void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue) { __target_switch @@ -13021,6 +13020,8 @@ extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite) case spirv: originalValue = __atomicAdd(this[coord], value); return; + case glsl: + __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)"; case hlsl: __intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)"; } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang index e8cd266d3..f53d38d74 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang @@ -4,7 +4,7 @@ // Disabled because validation layer doesn't like vector atomics, although nv driver does allow it. //DISABLED_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type //TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation - +//TEST:SIMPLE(filecheck=HLSL):-target hlsl -entry computeMain -profile cs_6_3 //TEST_INPUT:set tmpBuffer = ubuffer(data=[0 0 0 0], stride=4) RWByteAddressBuffer tmpBuffer; @@ -17,7 +17,7 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID) half originalValue; // SPIRV: OpAtomicFAddEXT - + // HLSL: NvInterlockedAddFp16x2 tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); tmpBuffer.InterlockedAddF16(2, 2.0h, originalValue); |
