diff options
| author | Yong He <yonghe@outlook.com> | 2024-10-17 20:14:22 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-17 20:14:22 -0700 |
| commit | a618b8c5e249b0f20e6c0c95f9da1b5cbfdbf08b (patch) | |
| tree | d583c373d574a265fefe7f288a96c4b382e259b8 | |
| parent | 11e1ecafa09396a3559fe245d729b40ce4f25d52 (diff) | |
Cleanup atomic intrinsics. (#5324)
* Cleanup atomic intrinsics.
* Fix.
* Fix glsl.
* Remove hacky intrinsic expansion logic for glsl image atomics.
* Fix all tests.
* Fix.
* Add `InterlockedAddF16Emulated`.
* Fix glsl intrinsic.
* Fix.
33 files changed, 1040 insertions, 2564 deletions
diff --git a/prelude/slang-cuda-prelude.h b/prelude/slang-cuda-prelude.h index 96ef22dd1..a6c8fd17b 100644 --- a/prelude/slang-cuda-prelude.h +++ b/prelude/slang-cuda-prelude.h @@ -1261,7 +1261,14 @@ struct ByteAddressBuffer memcpy(&data, ((const char*)this->data) + index, sizeof(T)); return data; } - + template<typename T> + SLANG_CUDA_CALL StructuredBuffer<T> asStructuredBuffer() const + { + StructuredBuffer<T> rs; + rs.data = (T*)data; + rs.count = sizeInBytes / sizeof(T); + return rs; + } const uint32_t* data; size_t sizeInBytes; //< Must be multiple of 4 }; @@ -1348,7 +1355,14 @@ struct RWByteAddressBuffer SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); return (T*)(((char*)data) + index); } - + template<typename T> + SLANG_CUDA_CALL RWStructuredBuffer<T> asStructuredBuffer() const + { + RWStructuredBuffer<T> rs; + rs.data = (T*)data; + rs.count = sizeInBytes / sizeof(T); + return rs; + } uint32_t* data; size_t sizeInBytes; //< Must be multiple of 4 }; diff --git a/source/slang/core.meta.slang b/source/slang/core.meta.slang index 084654d0f..67ec91cf6 100644 --- a/source/slang/core.meta.slang +++ b/source/slang/core.meta.slang @@ -299,6 +299,18 @@ interface __BuiltinSignedArithmeticType : __BuiltinArithmeticType {} interface __BuiltinIntegerType : __BuiltinArithmeticType, IInteger {} +/// Represent a `int` or `uint` type. +[sealed] +[builtin] +interface __BuiltinInt32Type : __BuiltinIntegerType +{} + +/// Represent a `int64_t` or `uint64_t` type. +[sealed] +[builtin] +interface __BuiltinInt64Type : __BuiltinIntegerType +{} + /// Represent builtin types that can represent a real number. [sealed] [builtin] @@ -603,6 +615,14 @@ ${{{{ , __BuiltinArithmeticType , __BuiltinIntegerType ${{{{ + if (kBaseTypes[tt].tag == BaseType::Int || kBaseTypes[tt].tag == BaseType::UInt) +}}}} + , __BuiltinInt32Type +${{{{ + if (kBaseTypes[tt].tag == BaseType::Int64 || kBaseTypes[tt].tag == BaseType::UInt64) +}}}} + , __BuiltinInt64Type +${{{{ ; // fall through case BaseType::Bool: }}}} diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 191fa3195..1c01c2f6b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -3923,475 +3923,36 @@ ${{{{ } }}}} -// AtomicAdd -// Make the GLSL atomicAdd available. -// We have separate int/float implementations, as the float version requires some specific extensions -// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_atomic_float.txt - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float) -[ForceInline] -[require(glsl_spirv, atomic_glsl_float1)] -float __atomicAdd(__ref float value, float amount) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicAdd($0, $1)"; - case spirv: - return spirv_asm - { - OpExtension "SPV_EXT_shader_atomic_float_add"; - OpCapability AtomicFloat32AddEXT; - result:$$float = OpAtomicFAddEXT &value Device None $amount - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_NV_shader_atomic_fp16_vector) -[ForceInline] -[require(glsl_spirv, atomic_glsl_halfvec)] -half2 __atomicAdd(__ref half2 value, half2 amount) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicAdd($0, $1)"; - case spirv: - return spirv_asm - { - OpExtension "SPV_EXT_shader_atomic_float_add"; - OpCapability AtomicFloat32AddEXT; - result:$$half2 = OpAtomicFAddEXT &value Device None $amount - }; - } -} - -// Helper for hlsl, using NVAPI -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __atomicAdd(RWByteAddressBuffer buf, uint offset, uint2) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedAddUint64($0, $1, $2)"; - } -} - -// atomic add for hlsl using SM6.6 -[require(hlsl, atomic_hlsl_sm_6_6)] -void __atomicAdd(RWByteAddressBuffer buf, uint offset, int64_t value, out int64_t originalValue) -{ - __target_switch - { - case hlsl: __intrinsic_asm "$0.InterlockedAdd64($1, $2, $3)"; - } -} - -[require(hlsl, atomic_hlsl_sm_6_6)] -void __atomicAdd(RWByteAddressBuffer buf, uint offset, uint64_t value, out uint64_t originalValue) -{ - __target_switch - { - case hlsl: __intrinsic_asm "$0.InterlockedAdd64($1, $2, $3)"; - } -} - -// Int versions require glsl 4.30 -// https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/atomicAdd.xhtml - -__glsl_version(430) -[ForceInline] -[require(glsl_spirv, atomic_glsl)] -int __atomicAdd(__ref int value, int amount) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicAdd($0, $1)"; - case spirv: - return spirv_asm - { - result:$$int = OpAtomicIAdd &value Device None $amount; - }; - } -} - -__glsl_version(430) -[ForceInline] -[require(glsl_spirv, atomic_glsl)] -uint __atomicAdd(__ref uint value, uint amount) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicAdd($0, $1)"; - case spirv: - return spirv_asm - { - result:$$uint = OpAtomicIAdd &value Device None $amount; - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -int64_t __atomicAdd(__ref int64_t value, int64_t amount) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicAdd($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$int64_t = OpAtomicIAdd &value Device None $amount - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __atomicAdd(__ref uint64_t value, uint64_t amount) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicAdd($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicIAdd &value Device None $amount - }; - } -} - -// Cas - Compare and swap - -// Helper for HLSL, using NVAPI - -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __cas(RWByteAddressBuffer buf, uint offset, uint2 compareValue, uint2 value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedCompareExchangeUint64($0, $1, $2, $3)"; - } -} - -// CAS using SM6.6 -[require(hlsl, atomic_hlsl_sm_6_6)] -void __cas(RWByteAddressBuffer buf, uint offset, in int64_t compare_value, in int64_t value, out int64_t original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "$0.InterlockedCompareExchange64($1, $2, $3, $4)"; - } -} - -[require(hlsl, atomic_hlsl_sm_6_6)] -void __cas(RWByteAddressBuffer buf, uint offset, in uint64_t compare_value, in uint64_t value, out uint64_t original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "$0.InterlockedCompareExchange64($1, $2, $3, $4)"; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -int64_t __cas(__ref int64_t ioValue, int64_t compareValue, int64_t newValue) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$int64_t = OpAtomicCompareExchange &ioValue Device None None $newValue $compareValue - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __cas(__ref uint64_t ioValue, uint64_t compareValue, uint64_t newValue) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicCompSwap($0, $1, $2)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicCompareExchange &ioValue Device None None $newValue $compareValue - }; - } -} - -// Max - -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __atomicMax(RWByteAddressBuffer buf, uint offset, uint2 value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedMaxUint64($0, $1, $2)"; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __atomicMax(__ref uint64_t ioValue, uint64_t value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicMax($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicUMax &ioValue Device None $value - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float2) -[ForceInline] -[require(glsl_spirv, atomic_glsl_float2)] -float __atomicMax(__ref float ioValue, float value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicMax($0, $1)"; - case spirv: - return spirv_asm - { - OpExtension "SPV_EXT_shader_atomic_float_min_max"; - OpCapability AtomicFloat32MinMaxEXT; - result:$$float = OpAtomicFMaxEXT &ioValue Device None $value - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float2) -[ForceInline] -[require(glsl_spirv, atomic_glsl_float2)] -half __atomicMax(__ref half ioValue, half value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicMax($0, $1)"; - case spirv: - return spirv_asm - { - OpExtension "SPV_EXT_shader_atomic_float_min_max"; - OpCapability AtomicFloat16MinMaxEXT; - result:$$half = OpAtomicFMaxEXT &ioValue Device None $value - }; - } -} - -// Min - -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __atomicMin(RWByteAddressBuffer buf, uint offset, uint2 value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedMinUint64($0, $1, $2)"; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __atomicMin(__ref uint64_t ioValue, uint64_t value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicMin($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicUMin &ioValue Device None $value - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float2) -[ForceInline] -[require(glsl_spirv, atomic_glsl_float2)] -float __atomicMin(__ref float ioValue, float value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicMin($0, $1)"; - case spirv: - return spirv_asm - { - OpExtension "SPV_EXT_shader_atomic_float_min_max"; - OpCapability AtomicFloat32MinMaxEXT; - result:$$float = OpAtomicFMinEXT &ioValue Device None $value - }; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_float2) -[ForceInline] -[require(glsl_spirv, atomic_glsl_float2)] -half __atomicMin(__ref half ioValue, half value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicMin($0, $1)"; - case spirv: - return spirv_asm - { - OpExtension "SPV_EXT_shader_atomic_float_min_max"; - OpCapability AtomicFloat16MinMaxEXT; - result:$$half = OpAtomicFMinEXT &ioValue Device None $value - }; - } -} - -// And - -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __atomicAnd(RWByteAddressBuffer buf, uint offset, uint2 value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedAndUint64($0, $1, $2)"; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __atomicAnd(__ref uint64_t ioValue, uint64_t value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicAnd($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicAnd &ioValue Device None $value - }; - } -} - -// Or - -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __atomicOr(RWByteAddressBuffer buf, uint offset, uint2 value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedOrUint64($0, $1, $2)"; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __atomicOr(__ref uint64_t ioValue, uint64_t value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicOr($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicOr &ioValue Device None $value - }; - } -} - -// Xor - -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __atomicXor(RWByteAddressBuffer buf, uint offset, uint2 value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedXorUint64($0, $1, $2)"; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __atomicXor(__ref uint64_t ioValue, uint64_t value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicXor($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicXor &ioValue Device None $value - }; - } -} - -// Exchange - -[__requiresNVAPI] -[require(hlsl, atomic_hlsl_nvapi)] -uint2 __atomicExchange(RWByteAddressBuffer buf, uint offset, uint2 value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "NvInterlockedExchangeUint64($0, $1, $2)"; - } -} - -__glsl_version(430) -__glsl_extension(GL_EXT_shader_atomic_int64) -[ForceInline] -[require(glsl_spirv, atomic_glsl_int64)] -uint64_t __atomicExchange(__ref uint64_t ioValue, uint64_t value) -{ - __target_switch - { - case glsl: __intrinsic_asm "atomicExchange($0, $1)"; - case spirv: - return spirv_asm - { - OpCapability Int64Atomics; - result:$$uint64_t = OpAtomicExchange &ioValue Device None $value - }; - } -} +// Atomic intrinsic insts. + +__intrinsic_op($(kIROp_AtomicExchange)) +T __atomic_exchange<T>(__ref T val, T newValue, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicCompareExchange)) +T __atomic_compare_exchange<T>( + __ref T val, + T compareValue, + T newValue, + MemoryOrder successOrder = MemoryOrder.Relaxed, + MemoryOrder failOrder = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicAdd)) +T __atomic_add<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicSub)) +T __atomic_sub<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicMax)) +T __atomic_max<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicMin)) +T __atomic_min<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicAnd)) +T __atomic_and<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicOr)) +T __atomic_or<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicXor)) +T __atomic_xor<T>(__ref T val, T value, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicInc)) +T __atomic_increment<T>(__ref T val, MemoryOrder order = MemoryOrder.Relaxed); +__intrinsic_op($(kIROp_AtomicDec)) +T __atomic_decrement<T>(__ref T val, MemoryOrder order = MemoryOrder.Relaxed); // Conversion between uint64_t and uint2 @@ -4802,6 +4363,20 @@ struct $(item.name) } ${{{{ + struct BufferAtomicOps + { + const char* name; + const char* internalName; + }; + const BufferAtomicOps bufferAtomicOps[] = { + {"Max", "max"}, + {"Min", "min"}, + {"Add", "add"}, + {"And", "and"}, + {"Or", "or"}, + {"Xor", "xor"}, + {"Exchange", "exchange"} + }; if (item.op == kIROp_HLSLRWByteAddressBufferType) { }}}} @@ -4822,6 +4397,13 @@ ${{{{ // F32 Add + /// Perform a 32-bit floating point atomic add operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic add operation. + /// @param valueToAdd The value to add to the value at `byteAddress`. + /// @param originalValue The original value at `byteAddress` before the add operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd`. For HLSL, this function translates to an NVAPI call + /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function + /// maps to `atomicAdd`. __cuda_sm_version(2.0) [__requiresNVAPI] [ForceInline] @@ -4832,35 +4414,45 @@ ${{{{ { case hlsl: __intrinsic_asm "($3 = NvInterlockedAddFp32($0, $1, $2))"; case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<float>($1), $2))"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<float>(this); - __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd, originalValue); - return; - } - case glsl: - case spirv: + default: { let buf = __getEquivalentStructuredBuffer<float>(this); - originalValue = __atomicAdd(buf[byteAddress / 4], valueToAdd); + originalValue = __atomic_add(buf[byteAddress / 4], valueToAdd); return; } } } // FP16x2 + + /// @internal + /// Maps to the `NvInterlockedAddFp16x2` NVAPI function. + /// [__requiresNVAPI] [ForceInline] - [require(hlsl, atomic_hlsl_nvapi)] + [require(cuda_hlsl_spirv)] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { __target_switch { case hlsl: __intrinsic_asm "NvInterlockedAddFp16x2($0, $1, $2)"; + default: + let buf = __getEquivalentStructuredBuffer<half2>(this); + return bit_cast<uint>(__atomic_add(buf[byteAddress / 4], bit_cast<half2>(fp16x2Value))); } } + + /// Perform a 16-bit floating point atomic add operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic add operation. + /// @param valueToAdd The value to add to the value at `byteAddress`. + /// @param originalValue The original value at `byteAddress` before the add operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` and requires `SPV_EXT_shader_atomic_float16_add` extension. + /// + /// For HLSL, this function translates to an NVAPI call + /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function + /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) @@ -4880,17 +4472,55 @@ ${{{{ originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; - case glsl: - case spirv: + default: + { + let buf = __getEquivalentStructuredBuffer<half>(this); + originalValue = __atomic_add(buf[byteAddress/2], value); + return; + } + } + } + + /// Perform a 16-bit floating point atomic add operation at `byteAddress` through emulation using `half2` atomics. + /// @param byteAddress The address at which to perform the atomic add operation. + /// @param valueToAdd The value to add to the value at `byteAddress`. + /// @param originalValue The original value at `byteAddress` before the add operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half2` vector with the correct part set to `value` + /// and the remaining part set to 0. This requires the `AtomicFloat16VectorNV` capability introduced by the `SPV_NV_shader_atomic_fp16_vector` + /// extension. + /// + /// For HLSL, this function translates to an equivalent NVAPI call + /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function + /// maps to `atomicAdd`. + [__requiresNVAPI] + [ForceInline] + void InterlockedAddF16Emulated(uint byteAddress, half value, out half originalValue) + { + __target_switch + { + case hlsl: + if ((byteAddress & 2) == 0) + { + uint packedInput = asuint16(value); + originalValue = asfloat16((uint16_t)_NvInterlockedAddFp16x2(byteAddress, packedInput)); + } + else + { + byteAddress = byteAddress & ~3; + uint packedInput = ((uint)asuint16(value)) << 16; + originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); + } + return; + default: { let buf = __getEquivalentStructuredBuffer<half2>(this); if ((byteAddress & 2) == 0) { - originalValue = __atomicAdd(buf[byteAddress/4], half2(value, half(0.0))).x; + originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x; } else { - originalValue = __atomicAdd(buf[byteAddress/4], half2(half(0.0), value)).y; + originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y; } return; } @@ -4908,484 +4538,207 @@ ${{{{ __target_switch { case hlsl: __intrinsic_asm "(NvInterlockedAddFp32($0, $1, $2))"; - case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<float>($1), $2)"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<float>(this); - __metalInterlocked_add(__getMetalAtomicRef(buf[byteAddress / 4]), valueToAdd); - return; - } - case glsl: - case spirv: + default: { let buf = __getEquivalentStructuredBuffer<float>(this); - __atomicAdd(buf[byteAddress / 4], valueToAdd); + __atomic_add(buf[byteAddress / 4], valueToAdd); return; } } } // Int64 Add + + /// Perform a 64-bit integer atomic add operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic add operation. + /// @param valueToAdd The value to add to the value at `byteAddress`. + /// @param originalValue The original value at `byteAddress` before the add operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicAdd`. For HLSL, this function + /// translates to `InterlockedAdd64` and requires shader model 6.6. + /// For CUDA, this function maps to `atomicAdd`. [ForceInline] - __cuda_sm_version(6.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue) { - __target_switch - { - case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<uint64_t>($1), $2))"; - case hlsl: - originalValue = __asuint64(__atomicAdd(this, byteAddress, __asuint2(valueToAdd))); - case glsl: - case spirv: - { - let buf = __getEquivalentStructuredBuffer<int64_t>(this); - originalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); - } - } + InterlockedAdd64(byteAddress, valueToAdd, originalValue); } // Without returning original value - __cuda_sm_version(6.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda6_int64)] + [ForceInline] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedAddI64(uint byteAddress, int64_t valueToAdd) { - __target_switch - { - case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<uint64_t>($1), $2)"; - case hlsl: - __atomicAdd(this, byteAddress, __asuint2(valueToAdd)); - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<int64_t>(this); - __atomicAdd(buf[byteAddress / 8], valueToAdd); - } + InterlockedAdd64(byteAddress, valueToAdd); } // Cas uint64_t - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] + /// Perform a 64-bit integer atomic compare-and-exchange operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic compare-and-exchange operation. + /// @param compareValue The value to compare to the value at `byteAddress`. + /// @param value The value to store at `byteAddress` if the comparison is successful. + /// @param originalValue The original value at `byteAddress` before the add operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function + /// translates to `InterlockedCompareExchange64` and requires shader model 6.6. + /// For CUDA, this function maps to `atomicCAS`. + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareExchangeU64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) { __target_switch { case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt<uint64_t>($1), $2, $3))"; case hlsl: - outOriginalValue = __asuint64(__cas(this, byteAddress, __asuint2(compareValue), __asuint2(value))); - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); - } - } - - // Max - - __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] - uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) - { - __target_switch - { - case cuda: __intrinsic_asm "atomicMax($0._getPtrAt<uint64_t>($1), $2)"; - case hlsl: - return __asuint64(__atomicMax(this, byteAddress, __asuint2(value))); - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - return __atomicMax(buf[byteAddress / 8], value); - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMax64(uint byteAddress, int64_t value) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMax64"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMax64(uint byteAddress, int64_t value, out int64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMax64"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMax64(uint byteAddress, uint64_t value) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMax64"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMax64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMax64"; - } - } - - // Min - - __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] - uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) - { - __target_switch - { - case cuda: __intrinsic_asm "atomicMin($0._getPtrAt<uint64_t>($1), $2)"; - case hlsl: - return __asuint64(__atomicMin(this, byteAddress, __asuint2(value))); - case glsl: - case spirv: + __intrinsic_asm ".InterlockedCompareExchange64"; + default: let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - return __atomicMin(buf[byteAddress / 8], value); - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMin64(uint byteAddress, int64_t value) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMin64"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMin64(uint byteAddress, int64_t value, out int64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMin64"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMin64(uint byteAddress, uint64_t value) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMin64"; + outOriginalValue = __atomic_compare_exchange(buf[byteAddress / 8], compareValue, value); } } - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedMin64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedMin64"; - } - } - - // And - - __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] - uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) - { - __target_switch - { - case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt<uint64_t>($1), $2)"; - case hlsl: - return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value))); - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - return __atomicAnd(buf[byteAddress / 8], value); - } - } + // SM6.6 6 64bit atomics. + // InterlockedMax64, InterlockedMin64, InterlockedAdd64, InterlockedAnd64, InterlockedOr64, InterlockedXor64, InterlockedExchange64 +${{{{ + for (auto op : bufferAtomicOps) { +}}}} [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedAnd64(uint byteAddress, uint64_t value) + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + uint64_t Interlocked$(op.name)U64(uint byteAddress, uint64_t value) { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedAnd64"; - } + uint64_t originalValue; + Interlocked$(op.name)64(byteAddress, value, originalValue); + return originalValue; } [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedAnd64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + void Interlocked$(op.name)64(uint byteAddress, int64_t value) { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedAnd64"; - } - } - - // Or - - __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] - uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) - { - __target_switch - { - case cuda: __intrinsic_asm "atomicOr($0._getPtrAt<uint64_t>($1), $2)"; - case hlsl: - return __asuint64(__atomicOr(this, byteAddress, __asuint2(value))); - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - return __atomicOr(buf[byteAddress / 8], value); - } + int64_t oldValue; + Interlocked$(op.name)64(byteAddress, value, oldValue); } + /// Perform a 64-bit integer atomic $(op.internalName) operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic $(op.internalName) operation. + /// @param value The operand for the $(op.internalName) operation. + /// @param originalValue The original value at `byteAddress` before the $(op.internalName) operation. [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedOr64(uint byteAddress, uint64_t value) + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + void Interlocked$(op.name)64<T:__BuiltinInt64Type>(uint byteAddress, T value, out T outOriginalValue) { __target_switch { - case hlsl: __intrinsic_asm ".InterlockedOr64"; + case hlsl: __intrinsic_asm ".Interlocked$(op.name)64"; + default: + let buf = __getEquivalentStructuredBuffer<T>(this); + outOriginalValue = __atomic_$(op.internalName)(buf[byteAddress / 8], value); + return; } } +${{{{ +} // for (each bufferOps) +}}}} + /// Perform a 64-bit integer atomic compare-and-exchange operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic compare-and-exchange operation. + /// @param compareValue The value to compare to the value at `byteAddress`. + /// @param value The value to store at `byteAddress` if the comparison is successful. + /// @param outOriginalValue The original value at `byteAddress` before the add operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function + /// translates to `InterlockedCompareExchange64` and requires shader model 6.6. + /// For CUDA, this function maps to `atomicCAS`. [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedOr64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + void InterlockedCompareExchange64<T:__BuiltinInt64Type>(uint byteAddress, T compareValue, T value, out T outOriginalValue) { __target_switch { - case hlsl: __intrinsic_asm ".InterlockedOr64"; - } - } - - // Xor - - __cuda_sm_version(5.0) - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda5_int64)] - uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) - { - __target_switch - { - case cuda: __intrinsic_asm "atomicXor($0._getPtrAt<uint64_t>($1), $2)"; case hlsl: - return __asuint64(__atomicXor(this, byteAddress, __asuint2(value))); - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - return __atomicXor(buf[byteAddress / 8], value); - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedXor64(uint byteAddress, uint64_t value) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedXor64"; + __intrinsic_asm ".InterlockedCompareExchange64"; + default: + let buf = __getEquivalentStructuredBuffer<T>(this); + outOriginalValue = __atomic_compare_exchange(buf[byteAddress / 8], compareValue, value); + return; } } [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedXor64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedXor64"; - } - } - - // Exchange - - [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_nvapi_cuda9_int64)] - uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue) { __target_switch { - case cuda: __intrinsic_asm "atomicExch($0._getPtrAt<uint64_t>($1), $2)"; - case hlsl: - return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - return __atomicExchange(buf[byteAddress / 8], value); + case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise"; + default: + let buf = __getEquivalentStructuredBuffer<float>(this); + outOriginalValue = __atomic_compare_exchange(buf[byteAddress / 4], compareValue, value); + return; } } + /// Perform a floating-point atomic bitwise exchange operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic exchange operation. + /// @param value The value to store at `byteAddress`. + /// @param [out] outOriginalValue The original value at `byteAddress` before the exchange operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicExchange`. For HLSL, this function + /// translates to `InterlockedExchangeFloat` and requires shader model 6.6. + /// For CUDA, this function maps to `atomicExch`. [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedExchangeFloat(uint byteAddress, float value, out float outOriginalValue) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedExchangeFloat"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedExchange64(uint byteAddress, int64_t value, out int64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedExchange64"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedExchange64(uint byteAddress, uint64_t value, out uint64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedExchange64"; - } - } - - // SM6.6 6 64bit atomics. - [ForceInline] - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] - void InterlockedAdd64(uint byteAddress, int64_t valueToAdd) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedAdd64"; - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<int64_t>(this); - __atomicAdd(buf[byteAddress / 8], valueToAdd); - } - } - - [ForceInline] - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] - void InterlockedAdd64(uint byteAddress, int64_t valueToAdd, out int64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedAdd64"; - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<int64_t>(this); - outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); - return; - } - } - - [ForceInline] - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] - void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedAdd64"; - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - __atomicAdd(buf[byteAddress / 8], valueToAdd); - } - } - - [ForceInline] - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] - void InterlockedAdd64(uint byteAddress, uint64_t valueToAdd, out uint64_t outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedAdd64"; - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - outOriginalValue = __atomicAdd(buf[byteAddress / 8], valueToAdd); + default: + let buf = __getEquivalentStructuredBuffer<float>(this); + outOriginalValue = __atomic_exchange(buf[byteAddress / 4], value); return; } } + /// Perform a 64-bit integer atomic compare-and-store operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic store operation. + /// @param compareValue The value to compare to the value at `byteAddress`. + /// @param value The value to store at `byteAddress` if the the value at address is equal to `compareValue`. + /// @param [out] outOriginalValue The original value at `byteAddress` before the store operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function + /// translates to `InterlockedCompareStore64` and requires shader model 6.6. + /// For CUDA, this function maps to `atomicCAS`. [ForceInline] - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] - void InterlockedCompareExchange64(uint byteAddress, int64_t compareValue, int64_t value, out int64_t outOriginalValue) - { - __target_switch - { - case hlsl: - __cas(this, byteAddress, compareValue, value, outOriginalValue); - return; - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<int64_t>(this); - outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); - return; - } - } - [ForceInline] - [require(glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] - void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue) + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] + void InterlockedCompareStore64<T:__BuiltinInt64Type>(uint byteAddress, T compareValue, T value) { __target_switch { - case hlsl: - __cas(this, byteAddress, compareValue, value, outOriginalValue); - return; - case glsl: - case spirv: - let buf = __getEquivalentStructuredBuffer<uint64_t>(this); - outOriginalValue = __cas(buf[byteAddress / 8], compareValue, value); + case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; + default: + let buf = __getEquivalentStructuredBuffer<T>(this); + __atomic_compare_exchange(buf[byteAddress / 4], compareValue, value); return; } } - + + /// Perform a floating-point atomic bitwise compare-and-store operation at `byteAddress`. + /// @param byteAddress The address at which to perform the atomic compare-and-exchange operation. + /// @param compareValue The value to perform bitwise comparison to the value at `byteAddress`. + /// @param value The value to store at `byteAddress` if the comparison is successful. + /// @param [out] outOriginalValue The original value at `byteAddress` before the compare-and-exchange operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function + /// translates to `InterlockedCompareStoreFloatBitwise` and requires shader model 6.6. + /// For CUDA, this function maps to `atomicCAS`. [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] + [require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda9_int64)] void InterlockedCompareStoreFloatBitwise(uint byteAddress, float compareValue, float value) { __target_switch { case hlsl: __intrinsic_asm ".InterlockedCompareStoreFloatBitwise"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedCompareExchangeFloatBitwise(uint byteAddress, float compareValue, float value, out float outOriginalValue) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedCompareExchangeFloatBitwise"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedCompareStore64(uint byteAddress, int64_t compareValue, int64_t value) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; - } - } - - [ForceInline] - [require(hlsl, atomic_hlsl_sm_6_6)] - void InterlockedCompareStore64(uint byteAddress, uint64_t compareValue, uint64_t value) - { - __target_switch - { - case hlsl: __intrinsic_asm ".InterlockedCompareStore64"; + default: + let buf = __getEquivalentStructuredBuffer<float>(this); + __atomic_compare_exchange(buf[byteAddress / 4], compareValue, value); + return; } } @@ -5393,103 +4746,62 @@ ${{{{ } // endif (type == RWByteAddressBuffer) }}}} - // Added operations: - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedAdd( - UINT dest, - UINT value, - out UINT original_value) - { - __target_switch - { - case glsl: __intrinsic_asm "($3 = atomicAdd($0._data[$1/4], $2))"; - case cuda: __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt<uint32_t>($1), $2))"; - case hlsl: __intrinsic_asm ".InterlockedAdd"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value, original_value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedAdd(buf[dest / 4], value, original_value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedAdd( - UINT dest, - UINT value) - { - __target_switch - { - case glsl: __intrinsic_asm "atomicAdd($0._data[$1/4], $2)"; - case cuda: __intrinsic_asm "atomicAdd($0._getPtrAt<uint32_t>($1), $2)"; - case hlsl: __intrinsic_asm ".InterlockedAdd"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_add(__getMetalAtomicRef(buf[dest / 4]), value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedAdd(buf[dest / 4], value); - } - } + // 32-bit atomic operations: + // InterlockedMax, InterlockedMin, InterlockedAdd, InterlockedAnd, InterlockedOr, InterlockedXor, InterlockedExchange +${{{{ + for (auto op : bufferAtomicOps) { +}}}} + /// Perform an atomic $(op.internalName) operation at the specified byte + /// location of the byte address buffer. + /// @param dest The byte address at which to perform the atomic $(op.internalName) operation. + /// @param value The operand of the atomic operation. + /// @param original_value The original value at `dest` before the $(op.internalName) operation. [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedAnd( + [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)] + void Interlocked$(op.name)( UINT dest, UINT value, out UINT original_value) { __target_switch { - case glsl: __intrinsic_asm "$3 = atomicAnd($0._data[$1/4], $2)"; - case cuda: __intrinsic_asm "(*$3 = atomicAnd($0._getPtrAt<uint32_t>($1), $2))"; - case hlsl: __intrinsic_asm ".InterlockedAnd"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value, original_value); - return; - } - case spirv: + case hlsl: __intrinsic_asm ".Interlocked$(op.name)"; + default: let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedAnd(buf[dest / 4], value, original_value); + ::Interlocked$(op.name)(buf[dest / 4], value, original_value); } } [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedAnd( + [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)] + void Interlocked$(op.name)( UINT dest, UINT value) { __target_switch { - case glsl: __intrinsic_asm "atomicAnd($0._data[$1/4], $2)"; - case cuda: __intrinsic_asm "atomicAnd($0._getPtrAt<uint32_t>($1), $2)"; - case hlsl: __intrinsic_asm ".InterlockedAnd"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_and(__getMetalAtomicRef(buf[dest / 4]), value); - return; - } - case spirv: + case hlsl: __intrinsic_asm ".Interlocked$(op.name)"; + default: let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedAnd(buf[dest / 4], value); + ::Interlocked$(op.name)(buf[dest / 4], value); } } +${{{{ +} // for (buffer atomic ops) +}}}} + /// Perform a 32-bit integer atomic compare-and-exchange operation at + /// the specified byte address within the `RWByteAddressBuffer`. + /// @param dest The address at which to perform the atomic compare-and-exchange operation. + /// @param compare_value The value to perform bitwise comparison to the value at `byteAddress`. + /// @param value The value to store at `byteAddress` if the comparison is successful. + /// @param original_value The original value at `byteAddress` before the compare-and-exchange operation. + /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function + /// translates to `InterlockedCompareExchange`. + /// For CUDA, this function maps to `atomicCAS`. [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)] void InterlockedCompareExchange( UINT dest, UINT compare_value, @@ -5498,23 +4810,23 @@ ${{{{ { __target_switch { - case glsl: __intrinsic_asm "($4 = atomicCompSwap($0._data[$1/4], $2, $3))"; - case cuda: __intrinsic_asm "(*$4 = atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3))"; case hlsl: __intrinsic_asm ".InterlockedCompareExchange"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value, original_value); - return; - } - case spirv: + default: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedCompareExchange(buf[dest / 4], compare_value, value, original_value); } } + /// Perform a 32-bit integer atomic compare-and-store operation at + /// the specified byte address within the `RWByteAddressBuffer`. + /// @param dest The address at which to perform the atomic add operation. + /// @param compare_value The value to perform comparison to the value at `byteAddress`. + /// @param value The value to store at `byteAddress` if the comparison is successful. + /// @remarks For SPIR-V, this function maps to `OpAtomicCompareExchange`. For HLSL, this function + /// translates to `InterlockedCompareStore`. + /// For CUDA, this function maps to `atomicCAS`. [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] + [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal, byteaddressbuffer_rw)] void InterlockedCompareStore( UINT dest, UINT compare_value, @@ -5522,232 +4834,13 @@ ${{{{ { __target_switch { - case glsl: __intrinsic_asm "atomicCompSwap($0._data[$1/4], $2, $3)"; - case cuda: __intrinsic_asm "atomicCAS($0._getPtrAt<uint32_t>($1), $2, $3)"; case hlsl: __intrinsic_asm ".InterlockedCompareStore"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_compare_exchange(__getMetalAtomicRef(buf[dest / 4]), compare_value, value); - return; - } - case spirv: + default: let buf = __getEquivalentStructuredBuffer<uint>(this); ::InterlockedCompareStore(buf[dest / 4], compare_value, value); } } - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedExchange( - UINT dest, - UINT value, - out UINT original_value) - { - __target_switch - { - case glsl: __intrinsic_asm "($3 = atomicExchange($0._data[$1/4], $2))"; - case cuda: __intrinsic_asm "(*$3 = atomicExch($0._getPtrAt<uint32_t>($1), $2))"; - case hlsl: __intrinsic_asm ".InterlockedExchange"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_exchange(__getMetalAtomicRef(buf[dest / 4]), value, original_value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedExchange(buf[dest / 4], value, original_value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedMax( - UINT dest, - UINT value, - out UINT original_value) - { - __target_switch - { - case glsl: __intrinsic_asm "($3 = atomicMax($0._data[$1/4], $2))"; - case cuda: __intrinsic_asm "(*$3 = atomicMax($0._getPtrAt<uint32_t>($1), $2))"; - case hlsl: __intrinsic_asm ".InterlockedMax"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value, original_value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedMax(buf[dest / 4], value, original_value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedMax( - UINT dest, - UINT value) - { - __target_switch - { - case glsl: __intrinsic_asm "atomicMax($0._data[$1/4], $2)"; - case cuda: __intrinsic_asm "atomicMax($0._getPtrAt<uint32_t>($1), $2)"; - case hlsl: __intrinsic_asm ".InterlockedMax"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_max(__getMetalAtomicRef(buf[dest / 4]), value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedMax(buf[dest / 4], value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedMin( - UINT dest, - UINT value, - out UINT original_value) - { - __target_switch - { - case glsl: __intrinsic_asm "($3 = atomicMin($0._data[$1/4], $2))"; - case cuda: __intrinsic_asm "(*$3 = atomicMin($0._getPtrAt<uint32_t>($1), $2))"; - case hlsl: __intrinsic_asm ".InterlockedMin"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value, original_value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedMin(buf[dest / 4], value, original_value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedMin( - UINT dest, - UINT value) - { - __target_switch - { - case glsl: __intrinsic_asm "atomicMin($0._data[$1/4], $2)"; - case cuda: __intrinsic_asm "atomicMin($0._getPtrAt<uint32_t>($1), $2)"; - case hlsl: __intrinsic_asm ".InterlockedMin"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_min(__getMetalAtomicRef(buf[dest / 4]), value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedMin(buf[dest / 4], value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedOr( - UINT dest, - UINT value, - out UINT original_value) - { - __target_switch - { - case glsl: __intrinsic_asm "($3 = atomicOr($0._data[$1/4], $2))"; - case cuda: __intrinsic_asm "(*$3 = atomicOr($0._getPtrAt<uint32_t>($1), $2))"; - case hlsl: __intrinsic_asm ".InterlockedOr"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value, original_value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedOr(buf[dest / 4], value, original_value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedOr( - UINT dest, - UINT value) - { - __target_switch - { - case glsl: __intrinsic_asm "atomicOr($0._data[$1/4], $2)"; - case cuda: __intrinsic_asm "atomicOr($0._getPtrAt<uint32_t>($1), $2)"; - case hlsl: __intrinsic_asm ".InterlockedOr"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_or(__getMetalAtomicRef(buf[dest / 4]), value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedOr(buf[dest / 4], value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedXor( - UINT dest, - UINT value, - out UINT original_value) - { - __target_switch - { - case glsl: __intrinsic_asm "($3 = atomicXor($0._data[$1/4], $2))"; - case cuda: __intrinsic_asm "(*$3 = atomicXor($0._getPtrAt<uint32_t>($1), $2))"; - case hlsl: __intrinsic_asm ".InterlockedXor"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value, original_value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedXor(buf[dest / 4], value, original_value); - } - } - - [ForceInline] - [require(cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] - void InterlockedXor( - UINT dest, - UINT value) - { - __target_switch - { - case glsl: __intrinsic_asm "atomicXor($0._data[$1/4], $2)"; - case cuda: __intrinsic_asm "atomicXor($0._getPtrAt<uint32_t>($1), $2)"; - case hlsl: __intrinsic_asm ".InterlockedXor"; - case metal: - { - let buf = __getEquivalentStructuredBuffer<uint>(this); - __metalInterlocked_xor(__getMetalAtomicRef(buf[dest / 4]), value); - return; - } - case spirv: - let buf = __getEquivalentStructuredBuffer<uint>(this); - ::InterlockedXor(buf[dest / 4], value); - } - } [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv, byteaddressbuffer_rw)] @@ -9699,26 +8792,6 @@ void GroupMemoryBarrierWithGroupSync() // Atomics -__generic<T> -__intrinsic_op($(kIROp_MetalAtomicCast)) -[require(metal)] -T* __getMetalAtomicRef(__ref T x); - -// Checks if input is a ImageSubscript -__generic<T> -__intrinsic_op($(kIROp_IsTextureAccess)) -bool __isTextureAccess(__ref T x); - -// Checks if input is a texture of T type scalar -__generic<T> -__intrinsic_op($(kIROp_IsTextureScalarAccess)) -bool __isTextureScalarAccess(__ref T x); - -// Checks if input is a texture array -__generic<T> -__intrinsic_op($(kIROp_IsTextureArrayAccess)) -bool __isTextureArrayAccess(__ref T x); - // Accepts an ImageSubscript // Gets Texture used with ImageSubscript. __generic<TextureAccess> @@ -9738,414 +8811,6 @@ __intrinsic_op($(kIROp_ExtractArrayCoordFromTextureAccess)) uint __extractArrayCoordFromTextureAccess(__ref TextureAccess x); ${{{{ -for (bool isArray : {false, true}) -{ - StringBuilder coordBuilder; - StringBuilder coordFetchBuilder; - - StringBuilder threeParamsASMBuilder; - StringBuilder threeParamsOutputParamASMBuilder; - - StringBuilder fourParamsASMBuilder; - - coordBuilder << "Coord coord"; - coordFetchBuilder << "coord"; - - threeParamsASMBuilder << "$1, $2"; - - fourParamsASMBuilder << "$1, $2, $3"; - if(isArray) - { - coordBuilder << ", uint arrayCoord"; - coordFetchBuilder << ", arrayCoord"; - threeParamsASMBuilder << ", $3"; - fourParamsASMBuilder << ", $4"; - threeParamsOutputParamASMBuilder << "$4"; - } - else - { - threeParamsOutputParamASMBuilder << "$3"; - } - auto coordString = coordBuilder.toString(); - auto coordFetchString = coordFetchBuilder.toString(); - - auto threeParamsASMString = threeParamsASMBuilder.toString(); - auto threeParamsOutputParamASMString = threeParamsOutputParamASMBuilder.toString(); - - auto fourParamsASMString = fourParamsASMBuilder.toString(); -}}}} - -${{{{ - for (const char* atomicOperation : {"add", "and", "max", "min", "or", "sub", "xor"}) - { -}}}} - __generic<TextureType, T, Coord> - [ForceInline] - [require(metal)] - vector<T, 4> __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value) - { - static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); - static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, - "__metalImageInterlocked implementation only allows 'uint' coordinates"); - __intrinsic_asm "$0.atomic_fetch_$(atomicOperation)($(threeParamsASMString))"; - } - - __generic<TextureType, T, Coord> - [ForceInline] - [require(metal)] - void __metalImageInterlocked_$(atomicOperation)(TextureType tex, $(coordString), vector<T, 4> value, out T original_value) - { - static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); - static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, - "__metalImageInterlocked implementation only allows 'uint' coordinates"); - original_value = __metalImageInterlocked_$(atomicOperation)(tex, $(coordFetchString), value)[0]; - } -${{{{ - } // atomicOperation -}}}} - - __generic<TextureType, T, Coord> - [ForceInline] - [require(metal)] - vector<T, 4> __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value) - { - static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); - static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, - "__metalImageInterlocked implementation only allows 'uint' coordinates"); - __intrinsic_asm "($0.atomic_exchange($(threeParamsASMString)))"; - } - __generic<TextureType, T, Coord> - [ForceInline] - [require(metal)] - void __metalImageInterlocked_exchange(TextureType tex, $(coordString), vector<T, 4> value, out T original_value) - { - static_assert(T is int || T is uint, "Metal atomic texture operations only allow 'int'/'uint' textures"); - static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, - "__metalImageInterlocked implementation only allows 'uint' coordinates"); - original_value = __metalImageInterlocked_exchange(tex, $(coordFetchString), value)[0]; - } - - __generic<TextureType, T, Coord> - [ForceInline] - [require(metal)] - void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), __ref vector<T, 4> compare_value, vector<T, 4> value) - { - static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); - static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, - "__metalImageInterlocked implementation only allows 'uint' coordinates"); - __intrinsic_asm "($0.atomic_compare_exchange_weak($(fourParamsASMString)))"; - } - __generic<TextureType, T, Coord> - [ForceInline] - [require(metal)] - void __metalImageInterlocked_compare_exchange(TextureType tex, $(coordString), vector<T, 4> compare_value, vector<T, 4> value, out T original_value) - { - static_assert(T is int || T is uint, "__metalImageInterlocked only allows 'int'/'uint' textures"); - static_assert(Coord is uint || Coord is vector<uint,2> || Coord is vector<uint,3> || Coord is vector<uint,4>, - "__metalImageInterlocked implementation only allows 'uint' coordinates"); - __metalImageInterlocked_compare_exchange(tex, $(coordFetchString), compare_value, value); - original_value = compare_value[0]; - } - -${{{{ -} // isArray -}}}} - -${{{{ - -// Generated functions: - -// atomicAdd, InterlockedAdd, atomic_fetch_add_explicit, OpAtomicIAdd, OpAtomicFAddEXT -// __cudaInterlocked_add, __glslInterlocked_add, __hlslInterlocked_add, __metalInterlocked_add, __spirvInterlocked_add - -// atomicAnd, InterlockedAnd, atomic_fetch_and_explicit, OpAtomicAnd -// __cudaInterlocked_and, __glslInterlocked_and, __hlslInterlocked_and, __metalInterlocked_and, __spirvInterlocked_and - -// atomicMax, InterlockedMax, atomic_fetch_max_explicit, OpAtomicUMax, OpAtomicSMax, OpAtomicFMaxEXT -// __cudaInterlocked_max, __glslInterlocked_max, __hlslInterlocked_max, __metalInterlocked_max, __spirvInterlocked_max - -// atomicMin, InterlockedMin, atomic_fetch_min_explicit, OpAtomicUMin, OpAtomicSMin, OpAtomicFMinEXT -// __cudaInterlocked_min, __glslInterlocked_min, __hlslInterlocked_min, __metalInterlocked_min, __spirvInterlocked_min - -// atomicOr, InterlockedOr, atomic_fetch_or_explicit, OpAtomicOr -// __cudaInterlocked_or, __glslInterlocked_or, __hlslInterlocked_or, __metalInterlocked_or, __spirvInterlocked_or - -// atomicXor, InterlockedXor, atomic_fetch_xor_explicit, OpAtomicXor -// __cudaInterlocked_xor, __glslInterlocked_xor, __hlslInterlocked_xor, __metalInterlocked_xor, __spirvInterlocked_xor - -// atomicExchange, atomicExch, InterlockedExchange, atomic_exchange_explicit, OpAtomicExchange -// __cudaInterlocked_exchange, __glslInterlocked_exchange, __hlslInterlocked_exchange, __metalInterlocked_exchange, __spirvInterlocked_exchange - -struct InternalAtomicOperationInfo -{ - const char* slangSuffix; - const char* cudaSuffix; - const char* glslSuffix; - const char* hlslSuffix; - const char* metalSuffix; - const char* spirvFloatSuffix; - const char* spirvUIntSuffix; - const char* spirvIntSuffix; - - const char* assertExpr; -}; - -InternalAtomicOperationInfo internalAtomicOperationInfo[7] = { - { "add", "Add", "Add", "Add", "fetch_add", "FAddEXT", "IAdd", "IAdd", "true" }, - { "and", "And", "And", "And", "fetch_and", "And", "And", "And", "!__isFloat<T>()" }, - { "max", "Max", "Max", "Max", "fetch_max", "FMaxEXT", "UMax", "SMax", "true" }, - { "min", "Min", "Min", "Min", "fetch_min", "FMinEXT", "UMin", "SMin", "true" }, - { "or", "Or", "Or", "Or", "fetch_or", "Or", "Or", "Or", "!__isFloat<T>()" }, - { "xor", "Xor", "Xor", "Xor", "fetch_xor", "Xor", "Xor", "Xor", "!__isFloat<T>()" }, - { "exchange", "Exch", "Exchange", "Exchange", "exchange", "Exchange", "Exchange", "Exchange", "true" }, -}; - -for (InternalAtomicOperationInfo atomicOp : internalAtomicOperationInfo) -{ -}}}} - __generic<AtomicType, T> - [ForceInline] - [require(metal)] - void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)"; - } - - __generic<AtomicType, T> - [ForceInline] - [require(metal)] - void __metalInterlocked_$(atomicOp.slangSuffix)(AtomicType dest, T value, out T original_value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "((*($2)) = (atomic_$(atomicOp.metalSuffix)_explicit($0, $1, memory_order_relaxed)))"; - } - - __generic<T> - [ForceInline] - [require(cuda)] - void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "atomic$(atomicOp.cudaSuffix)((int*)$0, $1)"; - } - - __generic<T> - [ForceInline] - [require(cuda)] - void __cudaInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "(*$2 = atomic$(atomicOp.cudaSuffix)((int*)$0, $1))"; - } - - __generic<T> - [ForceInline] - [require(glsl)] - void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "$atomic$(atomicOp.glslSuffix)($A, $1)"; - } - - __generic<T> - [ForceInline] - [require(glsl)] - void __glslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "($2 = $atomic$(atomicOp.glslSuffix)($A, $1))"; - } - - __generic<T> - [ForceInline] - [require(hlsl)] - void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)"; - } - - __generic<T> - [ForceInline] - [require(hlsl)] - void __hlslInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - __intrinsic_asm "Interlocked$(atomicOp.hlslSuffix)"; - } - - __generic<T> - [ForceInline] - [require(spirv)] - void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - if (__isFloat<T>()) - { - spirv_asm - { - result:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value - }; - } - else if (__isUnsignedInt<T>()) - { - spirv_asm - { - result:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value - }; - } - else if (__isInt<T>()) - { - spirv_asm - { - result:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value - }; - } - } - - __generic<T> - [ForceInline] - [require(spirv)] - void __spirvInterlocked_$(atomicOp.slangSuffix)(__ref T dest, T value, out T original_value) - { - static_assert($(atomicOp.assertExpr), "Unable to use float with Atomic$(atomicOp.slangSuffix)"); - if (__isFloat<T>()) - { - spirv_asm - { - %original:$$T = OpAtomic$(atomicOp.spirvFloatSuffix) &dest Device None $value; - OpStore &original_value %original - }; - } - else if (__isUnsignedInt<T>()) - { - spirv_asm - { - %original:$$T = OpAtomic$(atomicOp.spirvUIntSuffix) &dest Device None $value; - OpStore &original_value %original - }; - } - else if (__isInt<T>()) - { - spirv_asm - { - %original:$$T = OpAtomic$(atomicOp.spirvIntSuffix) &dest Device None $value; - OpStore &original_value %original - }; - } - } - -${{{{ -} // fetchAndModify -}}}} - -__generic<AtomicType, T> -[ForceInline] -[require(metal)] -void __metalInterlocked_compare_exchange(AtomicType dest, __ref T compare_value, T value) -{ - __intrinsic_asm "atomic_compare_exchange_weak_explicit($0, $1, $2, memory_order_relaxed, memory_order_relaxed)"; -} - -__generic<AtomicType, T> -[ForceInline] -[require(metal)] -void __metalInterlocked_compare_exchange(AtomicType dest, T compare_value, T value, out T original_value) -{ - __metalInterlocked_compare_exchange(dest, compare_value, value); - original_value = compare_value; -} - -__generic<T> -__glsl_version(430) -[ForceInline] -[require(cuda)] -void __cudaInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) -{ - __intrinsic_asm "atomicCAS($0, $1, $2)"; -} - -__generic<T> -[ForceInline] -[require(cuda)] -void __cudaInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) -{ - __intrinsic_asm "*$3 = atomicCAS($0, $1, $2)"; -} - -__generic<T> -[ForceInline] -[require(glsl)] -void __glslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) -{ - __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; -} - -__generic<T> -[ForceInline] -[require(glsl)] -void __glslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) -{ - __intrinsic_asm "($3 = $atomicCompSwap($A, $1, $2))"; -} - -__generic<T> -[ForceInline] -[require(hlsl)] -void __hlslInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) -{ - __intrinsic_asm "InterlockedCompareExchange"; -} - -__generic<T> -[ForceInline] -[require(hlsl)] -void __hlslInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) -{ - __intrinsic_asm "InterlockedCompareExchange"; -} - -__generic<T> -[ForceInline] -[require(spirv)] -void __spirvInterlocked_compare_exchange(__ref T dest, __ref T compare_value, T value) -{ - spirv_asm - { - %result:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value; - }; -} - -__generic<T> -[ForceInline] -[require(spirv)] -void __spirvInterlocked_compare_exchange(__ref T dest, T compare_value, T value, out T original_value) -{ - spirv_asm - { - %original:$$T = OpAtomicCompareExchange &dest Device None None $value $compare_value; - OpStore &original_value %original - }; -} - -__generic<T> -[ForceInline] -[require(hlsl)] -void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value) -{ - __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; -} - -__generic<T> -[ForceInline] -[require(hlsl)] -void __hlslInterlocked_compare_exchange_float_bitwise(__ref T dest, T compare_value, T value, out T original_value) -{ - __intrinsic_asm "InterlockedCompareExchangeFloatBitwise"; -} - -${{{{ // Generates code for: // InterlockedAdd, InterlockedAnd, InterlockedOr, InterlockedXor, // InterlockedMax, InterlockedMin, InterlockedExchange @@ -10153,516 +8818,166 @@ struct SlangAtomicOperationInfo { const char* slangCallSuffix; const char* internalCallSuffix; + const char* interface; }; SlangAtomicOperationInfo slangAtomicOperationInfo[7] = { - { "Add", "add" }, - { "And", "and" }, - { "Or", "or" }, - { "Xor", "xor" }, - { "Max", "max" }, - { "Min", "min" }, - { "Exchange", "exchange" }, + { "Add", "add", "IArithmeticAtomicable" }, + { "And", "and", "IArithmeticAtomicable" }, + { "Or", "or", "IArithmeticAtomicable" }, + { "Xor", "xor", "IArithmeticAtomicable" }, + { "Max", "max", "IArithmeticAtomicable" }, + { "Min", "min", "IArithmeticAtomicable" }, + { "Exchange", "exchange", "IAtomicable" }, }; for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo) { - for(const char* T : {"int", "uint"}) - { }}}} +/// Perform an atomic $(atomicOp.internalCallSuffix) operation on `dest`. +/// @param T The type of the value to perform the atomic operation on. +/// @param dest The value to perform the atomic operation on. +/// @param value The operand to the atomic operation. +/// @param original_value The value of `dest` before the operation. +/// @remarks When targeting HLSL, it is invalid to call this function with `T` being a floating-point type, since +/// HLSL does not allow atomic operations on floating point types. For `InterlockedAdd`, consider using +/// `RWByteAddressBuffer.InterlockedAddF32` or `RWByteAddressBuffer.InterlockedAddF16` instead when NVAPI is available. +/// On SPIR-V (Vulkan), all integer and floating point types are supported. +/// On Metal and WGSL, all floating-point types are not supported. +/// @category atomic Atomic functions [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] -void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value) +void Interlocked$(atomicOp.slangCallSuffix)<T:$(atomicOp.interface)>(__ref T dest, T value) { - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value); - case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value); - case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value); - case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value); - case metal: - if (__isTextureAccess(dest)) - { - if(__isTextureArrayAccess(dest)) - { - __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T), 4>(value)); - } - else - { - __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), vector<$(T), 4>(value)); - } - } - else - { - __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value); - } - return; - } + __atomic_$(atomicOp.internalCallSuffix)(dest, value); } [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] -void Interlocked$(atomicOp.slangCallSuffix)(__ref $(T) dest, $(T) value, out $(T) original_value) +void Interlocked$(atomicOp.slangCallSuffix)<T:$(atomicOp.interface)>(__ref T dest, T value, out T original_value) { - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to a scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); - case cuda: __cudaInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); - case glsl: __glslInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); - case spirv: __spirvInterlocked_$(atomicOp.internalCallSuffix)(dest, value, original_value); - case metal: - if (__isTextureAccess(dest)) - if(__isTextureArrayAccess(dest)) - { - __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value); - } - else - { - __metalImageInterlocked_$(atomicOp.internalCallSuffix)(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), vector<$(T),4>(value), original_value); - } - else - __metalInterlocked_$(atomicOp.internalCallSuffix)(__getMetalAtomicRef(dest), value, original_value); - return; - } + original_value = __atomic_$(atomicOp.internalCallSuffix)(dest, value); } -${{{{ - } // for(const char* T : {"int64_t", "uint64_t"}) -}}}} - [ForceInline] +__glsl_version(430) +[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] void Interlocked$(atomicOp.slangCallSuffix)(__ref uint dest, int value) { - Interlocked$(atomicOp.slangCallSuffix)(dest, (uint)value); + __atomic_$(atomicOp.internalCallSuffix)(dest, (uint)value); } ${{{{ } // for (SlangAtomicOperationInfo atomicOp : slangAtomicOperationInfo) }}}} -${{{{ -for(const char* T : {"int64_t", "uint64_t"}) -{ -}}}} -/// @category atomic Atomic functions -[ForceInline] -[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_metal)] -void InterlockedAdd(__ref $(T) dest, $(T) value) -{ - __target_switch - { - case hlsl: __hlslInterlocked_add(dest, value); - case cuda: __cudaInterlocked_add(dest, value); - case glsl: - __requireGLSLExtension("GL_EXT_shader_atomic_int64"); - __glslInterlocked_add(dest, value); - case spirv: - spirv_asm - { - OpCapability Int64Atomics; - result:$$$(T) = OpAtomicIAdd &dest Device None $value; - }; - } -} - -[ForceInline] -void InterlockedAdd(__ref $(T) dest, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __hlslInterlocked_add(dest, value, original_value); - case cuda: __cudaInterlocked_add(dest, value, original_value); - case glsl: - __requireGLSLExtension("GL_EXT_shader_atomic_int64"); - __glslInterlocked_add(dest, value, original_value); - case spirv: - spirv_asm - { - OpCapability Int64Atomics; - %origin:$$$(T) = OpAtomicIAdd &dest Device None $value; - OpStore &original_value %origin - }; - } -} - -/// @category atomic -[ForceInline] -void InterlockedAnd(__ref $(T) dest, $(T) value) -{ - __target_switch - { - case hlsl: __hlslInterlocked_and(dest, value); - } -} - -[ForceInline] -void InterlockedAnd(__ref $(T) dest, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __hlslInterlocked_and(dest, value, original_value); - } -} - -/// @category atomic -[ForceInline] -void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value) -{ - __target_switch - { - case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value); - } -} - -[ForceInline] -void InterlockedCompareExchange(__ref $(T) dest, $(T) compare_value, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); - } -} - -[ForceInline] -void InterlockedCompareStore(__ref $(T) dest, $(T) compare_value, $(T) value); -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareStore"; - } -} - -/// @category atomic -[ForceInline] -void InterlockedExchange(__ref $(T) dest, $(T) value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedExchange"; - } -} - -[ForceInline] -void InterlockedExchange(__ref $(T) dest, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedExchange"; - } -} - -/// @category atomic -[ForceInline] -void InterlockedMax(__ref $(T) dest, $(T) value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedMax"; - } -} - -[ForceInline] -void InterlockedMax(__ref $(T) dest, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedMax"; - } -} - -/// @category atomic -[ForceInline] -void InterlockedMin(__ref $(T) dest, $(T) value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedMin"; - } -} - -[ForceInline] -void InterlockedMin(__ref $(T) dest, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedMin"; - } -} - -/// @category atomic -[ForceInline] -void InterlockedOr(__ref $(T) dest, $(T) value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedOr"; - } -} - -[ForceInline] -void InterlockedOr(__ref $(T) dest, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedOr"; - } -} - -/// @category atomic -[ForceInline] -void InterlockedXor(__ref $(T) dest, $(T) value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedXor"; - } -} - -[ForceInline] -void InterlockedXor(__ref $(T) dest, $(T) value, out $(T) original_value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedXor"; - } -} - -${{{{ -} // for(const char* T : {"int64_t", "uint64_t"}) -}}}} - +/// Perform an atomic compare and exchange operation on `dest`. +/// @param T The type of the value to perform the atomic operation on. +/// @param dest The value to perform the atomic operation on. +/// @param compare_value The value to compare `dest` with. +/// @param value The value to store into `dest` if the compare result is equal. +/// @param original_value The value of `dest` before the operation. +/// @remarks When targeting HLSL, a call to this function with `T` being `float` will translate to a call to +/// `InterlockedCompareExchangeFloatBitwise`, which means the comparison is done as a bitwise comparison. +/// +/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`. +/// +/// On Metal and WGSL, all floating-point types are not supported. +/// +/// On CUDA, this function maps to `atomicCAS`. /// @category atomic [ForceInline] -__glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] -void InterlockedCompareExchange(__ref int dest, int compare_value, int value, out int original_value) +void InterlockedCompareExchange<T:IAtomicable>(__ref T dest, T compare_value, T value, out T original_value) { - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); - case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value); - case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value); - case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value); - case metal: - if (__isTextureAccess(dest)) - { - vector<int, 4> vec_compare_value = vector<int, 4>(compare_value); - if(__isTextureArrayAccess(dest)) - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value); - } - else - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value), original_value); - } - } - else - { - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); - } - return; - } -} - -[ForceInline] -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] -void InterlockedCompareExchange(__ref uint dest, uint compare_value, uint value, out uint original_value) -{ - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_compare_exchange(dest, compare_value, value, original_value); - case cuda: __cudaInterlocked_compare_exchange(dest, compare_value, value, original_value); - case glsl: __glslInterlocked_compare_exchange(dest, compare_value, value, original_value); - case spirv: __spirvInterlocked_compare_exchange(dest, compare_value, value, original_value); - case metal: - if (__isTextureAccess(dest)) - { - vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value); - if(__isTextureArrayAccess(dest)) - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value); - } - else - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value), original_value); - } - } - else - { - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); - } - return; - } + original_value = __atomic_compare_exchange(dest, compare_value, value); } +/// Perform an atomic compare and exchange operation on `dest`. +/// @param T The type of the value to perform the atomic operation on. +/// @param dest The value to perform the atomic operation on. +/// @param compare_value The value to compare `dest` with. +/// @param value The value to store into `dest` if the compare result is equal. +/// @param original_value The value of `dest` before the operation. +/// @remarks When targeting HLSL, a call to this function will translate to a call to +/// `InterlockedCompareExchangeFloatBitwise`, which means the comparison is done as a bitwise comparison. +/// +/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`. +/// +/// On Metal and WGSL, this function is not available. +/// +/// On CUDA, this function maps to `atomicCAS`. /// @category atomic [ForceInline] void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value) { - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value); - case metal: - static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's"); - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); - return; - } + __atomic_compare_exchange(dest, compare_value, value); } [ForceInline] void InterlockedCompareExchangeFloatBitwise(__ref float dest, float compare_value, float value, out float original_value) { - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_compare_exchange_float_bitwise(dest, compare_value, value, original_value); - case metal: - static_assert(!__isTextureAccess(dest), "float atomic texture operations are disallowed with Metal target's"); - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value, original_value); - return; - } + original_value = __atomic_compare_exchange(dest, compare_value, value); } +/// Perform an atomic compare and store operation on `dest`. +/// @param T The type of the value to perform the atomic operation on. +/// @param dest The value to perform the atomic operation on. +/// @param compare_value The value to compare `dest` with. +/// @param value The value to store into `dest` if the compare result is equal. +/// @remarks When targeting HLSL, a call to this function with `T` being `float` will translate to a call to +/// `InterlockedCompareStoreFloatBitwise`, which means the comparison is done as a bitwise comparison. +/// +/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`. +/// +/// On Metal and WGSL, this function is not available. +/// +/// On CUDA, this function maps to `atomicCAS`. /// @category atomic [ForceInline] __glsl_version(430) [require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] -void InterlockedCompareStore(__ref int dest, int compare_value, int value) +void InterlockedCompareStore<T:IAtomicable>(__ref T dest, T compare_value, T value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareStore"; - case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; - case cuda: __intrinsic_asm "atomicCAS($0, $1, $2)"; - case spirv: - { - spirv_asm - { - result:$$int = OpAtomicCompareExchange &dest Device None None $value $compare_value; - }; - return; - } - case metal: - { - if (__isTextureAccess(dest)) - { - vector<int, 4> vec_compare_value = vector<int, 4>(compare_value); - if(__isTextureArrayAccess(dest)) - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value)); - } - else - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), vec_compare_value, vector<int, 4>(value)); - } - } - else - { - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); - } - return; - } - } -} - -[ForceInline] -__glsl_version(430) -[require(cuda_glsl_hlsl_metal_spirv, atomic_glsl_hlsl_cuda_metal)] -void InterlockedCompareStore(__ref uint dest, uint compare_value, uint value) -{ - __target_switch - { - case hlsl: __intrinsic_asm "InterlockedCompareStore"; - case glsl: __intrinsic_asm "$atomicCompSwap($A, $1, $2)"; - case cuda: __intrinsic_asm "atomicCAS((int*)$0, $1, $2)"; - case spirv: - spirv_asm - { - result:$$uint = OpAtomicCompareExchange &dest Device None None $value $compare_value; - }; - case metal: - if (__isTextureAccess(dest)) - { - vector<uint, 4> vec_compare_value = vector<uint, 4>(compare_value); - if(__isTextureArrayAccess(dest)) - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), __extractArrayCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value)); - } - else - { - __metalImageInterlocked_compare_exchange(__extractTextureFromTextureAccess(dest), - __extractCoordFromTextureAccess(dest), vec_compare_value, vector<uint, 4>(value)); - } - } - else - { - __metalInterlocked_compare_exchange(__getMetalAtomicRef(dest), compare_value, value); - } + default: + __atomic_compare_exchange(dest, compare_value, value); return; } } +/// Perform an atomic compare and store operation on `dest`. +/// @param T The type of the value to perform the atomic operation on. +/// @param dest The value to perform the atomic operation on. +/// @param compare_value The value to compare `dest` with. +/// @param value The value to store into `dest` if the compare result is equal. +/// @remarks When targeting HLSL, a call to this function will translate to a call to +/// `InterlockedCompareStoreFloatBitwise`, which means the comparison is done as a bitwise comparison. +/// +/// On SPIR-V (Vulkan), this function maps to `OpAtomicCompareExchange`. +/// +/// On Metal and WGSL, this function is not available. +/// +/// On CUDA, this function maps to `atomicCAS`. /// @category atomic [ForceInline] -void InterlockedCompareStoreFloatBitwise(__ref float dest, float compare_value, float value) +void InterlockedCompareStoreFloatBitwise<T:IAtomicable>(__ref T dest, T compare_value, T value) { __target_switch { case hlsl: __intrinsic_asm "InterlockedCompareStoreFloatBitwise"; - } -} - -/// @category atomic -[ForceInline] -void InterlockedExchange(__ref float dest, float value) -{ - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_exchange(dest, value); - case metal: - static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's"); - __metalInterlocked_exchange(__getMetalAtomicRef(dest), value); - return; - } -} - -[ForceInline] -void InterlockedExchange(__ref float dest, float value, out float original_value) -{ - static_assert(__isTextureScalarAccess(dest) || !__isTextureAccess(dest), "Atomic must be applied to scalar texture or non-texture"); - __target_switch - { - case hlsl: __hlslInterlocked_exchange(dest, value, original_value); - case metal: - static_assert(!__isTextureAccess(dest), "'float' atomic texture operations are disallowed with Metal target's"); - __metalInterlocked_exchange(__getMetalAtomicRef(dest), value, original_value); + default: + __atomic_compare_exchange(dest, compare_value, value); return; } } - /// Test if a floating-point value finite. /// @category math __generic<T : __BuiltinFloatingPointType> @@ -21245,13 +19560,13 @@ extension _Texture<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite), 0, { __target_switch { - case spirv: - originalValue = __atomicAdd(this[coord], value); + default: + originalValue = __atomic_add(this[coord], value); return; - case glsl: - __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)"; case hlsl: __intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)"; + case glsl: + __intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)"; } } diff --git a/source/slang/slang-diagnostic-defs.h b/source/slang/slang-diagnostic-defs.h index 298c79f7e..48b296ce3 100644 --- a/source/slang/slang-diagnostic-defs.h +++ b/source/slang/slang-diagnostic-defs.h @@ -883,6 +883,7 @@ DIAGNOSTIC(55200, Error, unsupportedBuiltinType, "'$0' is not a supported builti DIAGNOSTIC(55201, Error, unsupportedRecursion, "recursion detected in call to '$0', but the current code generation target does not allow recursion.") DIAGNOSTIC(55202, Error, systemValueAttributeNotSupported, "system value semantic '$0' is not supported for the current target.") DIAGNOSTIC(55203, Error, systemValueTypeIncompatible, "system value semantic '$0' should have type '$1' or be convertible to type '$1'.") +DIAGNOSTIC(55204, Error, unsupportedTargetIntrinsic, "intrinsic operation '$0' is not supported for the current target.") DIAGNOSTIC(56001, Error, unableToAutoMapCUDATypeToHostType, "Could not automatically map '$0' to a host type. Automatic binding generation failed for '$1'") DIAGNOSTIC(56002, Error, attemptToQuerySizeOfUnsizedArray, "cannot obtain the size of an unsized array.") diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index b113f726e..79a9b1a56 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -2472,6 +2472,16 @@ void CLikeSourceEmitter::defaultEmitInstExpr(IRInst* inst, const EmitOpInfo& inO } break; + case kIROp_GetEquivalentStructuredBuffer: + { + auto base = inst->getOperand(0); + emitOperand(base, outerPrec); + m_writer->emit(".asStructuredBuffer<"); + emitType(as<IRHLSLStructuredBufferTypeBase>(inst->getDataType())->getElementType()); + m_writer->emit(">()"); + } + break; + case kIROp_RWStructuredBufferStore: { auto base = inst->getOperand(0); diff --git a/source/slang/slang-emit-c-like.h b/source/slang/slang-emit-c-like.h index 3cccad9e6..f0d703b40 100644 --- a/source/slang/slang-emit-c-like.h +++ b/source/slang/slang-emit-c-like.h @@ -260,7 +260,6 @@ public: bool hasExplicitConstantBufferOffset(IRInst* cbufferType); bool isSingleElementConstantBuffer(IRInst* cbufferType); bool shouldForceUnpackConstantBufferElements(IRInst* cbufferType); - // // Expressions // diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 81bcafeb3..7d104ff1b 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -515,7 +515,17 @@ bool CUDASourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitInstResultDecl(inst); m_writer->emit("atomicAdd("); + bool needCloseTypeCast = false; + if (inst->getDataType()->getOp() == kIROp_Int64Type) + { + m_writer->emit("(unsigned long long*)("); + needCloseTypeCast = true; + } emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (needCloseTypeCast) + { + m_writer->emit(")"); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -525,7 +535,17 @@ bool CUDASourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitInstResultDecl(inst); m_writer->emit("atomicAdd("); + bool needCloseTypeCast = false; + if (inst->getDataType()->getOp() == kIROp_Int64Type) + { + m_writer->emit("(unsigned long long*)("); + needCloseTypeCast = true; + } emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (needCloseTypeCast) + { + m_writer->emit(")"); + } m_writer->emit(", -("); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit("));\n"); diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index ca5569602..7f8bc14b4 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -2153,8 +2153,50 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu return false; } +static IRImageSubscript* isTextureAccess(IRInst* inst) +{ + return as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); +} + +void GLSLSourceEmitter::emitAtomicImageCoord(IRImageSubscript* inst) +{ + emitOperand(inst->getImage(), getInfo(EmitOp::General)); + m_writer->emit(", "); + if (auto vecType = as<IRVectorType>(inst->getCoord()->getDataType())) + { + m_writer->emit("ivec"); + m_writer->emit(getIntVal(vecType->getElementCount())); + } + else + { + m_writer->emit("int"); + } + m_writer->emit("("); + emitOperand(inst->getCoord(), getInfo(EmitOp::General)); + m_writer->emit(")"); + if (inst->hasSampleCoord()) + { + m_writer->emit(", "); + emitOperand(inst->getSampleCoord(), getInfo(EmitOp::General)); + } +} + bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { + auto requireAtomicExtIfNeeded = [&]() + { + if (isFloatingType(inst->getDataType())) + { + _requireGLSLExtension(toSlice("GL_EXT_shader_atomic_float")); + } + if (isIntegralType(inst->getDataType())) + { + if (getIntTypeInfo(inst->getDataType()).width == 64) + { + _requireGLSLExtension(toSlice("GL_EXT_shader_atomic_int64")); + } + } + }; switch (inst->getOp()) { case kIROp_StructuredBufferGetDimensions: @@ -2176,24 +2218,52 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) case kIROp_AtomicLoad: { emitInstResultDecl(inst); - emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageLoad("); + emitAtomicImageCoord(imageSubscript); + m_writer->emit(")"); + } + else + { + emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(";\n"); return true; } case kIROp_AtomicStore: { - emitInstResultDecl(inst); - emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(" = "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(";\n"); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageStore("); + emitAtomicImageCoord(imageSubscript); + m_writer->emit(", "); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(")"); + } + else + { + emitDereferenceOperand(inst->getOperand(0), getInfo(EmitOp::General)); + m_writer->emit(" = "); + emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + m_writer->emit(";\n"); + } return true; } case kIROp_AtomicExchange: { + requireAtomicExtIfNeeded(); emitInstResultDecl(inst); - m_writer->emit("atomicExchange("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicExchange("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicExchange("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -2201,9 +2271,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicCompareExchange: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicCompSwap("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicCompSwap("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicCompSwap("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(", "); @@ -2213,9 +2293,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicAdd: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicAdd("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicAdd("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicAdd("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -2223,9 +2313,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicSub: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicAdd("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicAdd("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicAdd("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", -("); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit("));\n"); @@ -2233,9 +2333,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicAnd: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicAnd("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicAnd("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicAnd("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -2243,9 +2353,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicOr: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicOr("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicOr("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicOr("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -2253,9 +2373,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicXor: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicXor("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicXor("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicXor("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -2263,9 +2393,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicMin: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicMin("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicMin("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicMin("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -2273,9 +2413,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicMax: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicMax("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicMax("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicMax("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(");\n"); @@ -2283,9 +2433,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicInc: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicAdd("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicAdd("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicAdd("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitType(inst->getDataType()); m_writer->emit("(1)"); @@ -2294,9 +2454,19 @@ bool GLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicDec: { + requireAtomicExtIfNeeded(); + emitInstResultDecl(inst); - m_writer->emit("atomicAdd("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (auto imageSubscript = isTextureAccess(inst)) + { + m_writer->emit("imageAtomicAdd("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomicAdd("); + emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + } m_writer->emit(", "); emitType(inst->getDataType()); m_writer->emit("(-1)"); diff --git a/source/slang/slang-emit-glsl.h b/source/slang/slang-emit-glsl.h index 8958c7608..12ab60e46 100644 --- a/source/slang/slang-emit-glsl.h +++ b/source/slang/slang-emit-glsl.h @@ -133,6 +133,8 @@ protected: void _emitSpecialFloatImpl(IRType* type, const char* valueExpr); + void emitAtomicImageCoord(IRImageSubscript* operand); + Dictionary<IRInst*, HashSet<IRFunc*>> m_referencingEntryPoints; RefPtr<GLSLExtensionTracker> m_glslExtensionTracker; diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp index b45b4c575..ae87fd6d5 100644 --- a/source/slang/slang-emit-hlsl.cpp +++ b/source/slang/slang-emit-hlsl.cpp @@ -498,6 +498,10 @@ void HLSLSourceEmitter::emitEntryPointAttributesImpl(IRFunc* irFunc, IREntryPoin bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { + auto diagnoseFloatAtommic = [&]() + { + getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "floating point atomic operation"); + }; switch (inst->getOp()) { case kIROp_AtomicLoad: @@ -519,7 +523,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedExchange("); + m_writer->emit("InterlockedExchange"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -532,7 +537,10 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedCompareExchange("); + m_writer->emit("InterlockedCompareExchange"); + if (inst->getDataType()->getOp() == kIROp_FloatType) + m_writer->emit("FloatBitwise"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -547,7 +555,12 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedAdd("); + if (inst->getDataType()->getOp() == kIROp_FloatType) + { + diagnoseFloatAtommic(); + } + m_writer->emit("InterlockedAdd"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -560,7 +573,12 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedAdd("); + if (inst->getDataType()->getOp() == kIROp_FloatType) + { + diagnoseFloatAtommic(); + } + m_writer->emit("InterlockedAdd"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", -("); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -573,7 +591,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedAnd("); + m_writer->emit("InterlockedAnd"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -586,7 +605,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedOr("); + m_writer->emit("InterlockedOr"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -599,7 +619,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedXor("); + m_writer->emit("InterlockedXor"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -612,7 +633,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedMin("); + m_writer->emit("InterlockedMin"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -625,7 +647,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedMax("); + m_writer->emit("InterlockedMax"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); @@ -638,7 +661,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedAdd("); + m_writer->emit("InterlockedAdd"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", 1, "); m_writer->emit(getName(inst)); @@ -649,7 +673,8 @@ bool HLSLSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n"); - m_writer->emit("InterlockedAdd("); + m_writer->emit("InterlockedAdd"); + m_writer->emit("("); emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); m_writer->emit(", -1, "); m_writer->emit(getName(inst)); diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index 2d5a7d56b..abd4d670a 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -260,8 +260,118 @@ void MetalSourceEmitter::emitMemoryOrderOperand(IRInst* inst) } } +static IRImageSubscript* isTextureAccess(IRInst* inst) +{ + return as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); +} + +void MetalSourceEmitter::emitAtomicImageCoord(IRImageSubscript* inst) +{ + auto resourceType = as<IRResourceTypeBase>(inst->getImage()->getDataType()); + if (auto textureType = as<IRTextureType>(resourceType)) + { + if (as<IRVectorType>(textureType->getElementType())) + { + getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "atomic operation on non-scalar texture"); + } + } + bool isArray = getIntVal(resourceType->getIsArrayInst()) != 0; + if (isArray) + { + emitOperand(inst->getCoord(), getInfo(EmitOp::Postfix)); + if (auto coordType = as<IRVectorType>(inst->getCoord()->getDataType())) + { + m_writer->emit("."); + const char* elements[] = { "x", "y", "z", "w" }; + for (IRIntegerValue i = 0; i < getIntVal(coordType->getElementCount()) - 1; i++) + m_writer->emit(elements[Math::Min(3, (int)i)]); + m_writer->emit(", "); + emitOperand(inst->getCoord(), getInfo(EmitOp::Postfix)); + m_writer->emit("."); + m_writer->emit(elements[Math::Min(3, (int)getIntVal(coordType->getElementCount()) - 1)]); + } + else + { + getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "invalid image coordinate for atomic operation"); + } + } + else + { + emitOperand(inst->getCoord(), getInfo(EmitOp::General)); + } +} + +void MetalSourceEmitter::emitAtomicDestOperand(IRInst* inst) +{ + // If operand is already an atomic type, we can emit it + // as is. + auto ptrType = as<IRPtrTypeBase>(inst->getDataType()); + if (ptrType && as<IRAtomicType>(ptrType->getValueType())) + { + emitOperand(inst, getInfo(EmitOp::General)); + return; + } + // Otherwise, we need to emit a cast. + m_writer->emit("((atomic_"); + emitType(inst->getDataType()); + m_writer->emit(")("); + emitOperand(inst, getInfo(EmitOp::General)); + m_writer->emit("))"); +} + +void MetalSourceEmitter::emitAtomicSrcOperand(bool isImage, IRInst* inst) +{ + if (!isImage) + { + emitOperand(inst, getInfo(EmitOp::General)); + return; + } + // If we are emitting a source operand for an atomic image operation, + // we need to convert it into a 4-vector. + m_writer->emit("vec<"); + emitType(inst->getDataType()); + m_writer->emit(", 4>("); + emitOperand(inst, getInfo(EmitOp::General)); + m_writer->emit(")"); +} + bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) { + auto emitAtomicOp = [&](const char* imageFunc, const char* bufferFunc) + { + emitInstResultDecl(inst); + bool isImageOp = false; + if (auto imageSubscript = isTextureAccess(inst)) + { + emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix)); + m_writer->emit("."); + m_writer->emit(imageFunc); + m_writer->emit("("); + emitAtomicImageCoord(imageSubscript); + isImageOp = true; + } + else + { + m_writer->emit(bufferFunc); + m_writer->emit("("); + emitAtomicDestOperand(inst->getOperand(0)); + } + m_writer->emit(", "); + emitAtomicSrcOperand(isImageOp, inst->getOperand(1)); + if (!isImageOp) + { + m_writer->emit(", "); + emitMemoryOrderOperand(inst->getOperand(inst->getOperandCount() - 1)); + } + if (isImageOp) + m_writer->emit(").x;\n"); + else + m_writer->emit(");\n"); + }; + auto diagnoseFloatAtommic = [&]() + { + getSink()->diagnose(inst, Diagnostics::unsupportedTargetIntrinsic, "floating point atomic operation"); + }; switch (inst->getOp()) { case kIROp_discard: @@ -287,160 +397,216 @@ bool MetalSourceEmitter::tryEmitInstStmtImpl(IRInst* inst) } case kIROp_AtomicLoad: { + if (isFloatingType(inst->getDataType())) + diagnoseFloatAtommic(); + emitInstResultDecl(inst); - m_writer->emit("atomic_load_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(1)); - m_writer->emit(");\n"); + bool isImageOp = false; + if (auto imageSubscript = isTextureAccess(inst)) + { + emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix)); + m_writer->emit(".atomic_load("); + emitAtomicImageCoord(imageSubscript); + isImageOp = true; + } + else + { + m_writer->emit("atomic_load_explicit("); + emitAtomicDestOperand(inst->getOperand(0)); + } + if (!isImageOp) + { + m_writer->emit(", "); + emitMemoryOrderOperand(inst->getOperand(1)); + } + if (isImageOp) + m_writer->emit(").x;\n"); + else + m_writer->emit(");\n"); return true; } case kIROp_AtomicStore: { - m_writer->emit("atomic_store_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); + bool isImageOp = false; + if (auto imageSubscript = isTextureAccess(inst)) + { + emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix)); + m_writer->emit(".atomic_store("); + emitAtomicImageCoord(imageSubscript); + isImageOp = true; + } + else + { + m_writer->emit("atomic_store_explicit("); + emitAtomicDestOperand(inst->getOperand(0)); + } m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); + emitAtomicSrcOperand(isImageOp, inst->getOperand(1)); + if (!isImageOp) + { + m_writer->emit(", "); + emitMemoryOrderOperand(inst->getOperand(2)); + } m_writer->emit(");\n"); return true; } case kIROp_AtomicExchange: { - emitInstResultDecl(inst); - m_writer->emit("atomic_exchange_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + if (isFloatingType(inst->getDataType())) + diagnoseFloatAtommic(); + + emitAtomicOp("atomic_exchange", "atomic_exchange_explicit"); return true; } case kIROp_AtomicCompareExchange: { + if (isFloatingType(inst->getDataType())) + diagnoseFloatAtommic(); + + bool isImageOp = false; + auto imageSubscript = isTextureAccess(inst); + isImageOp = (imageSubscript != nullptr); + emitType(inst->getDataType(), getName(inst)); m_writer->emit(";\n{\n"); - emitType(inst->getDataType(), "_metal_cas_comparand"); + if (isImageOp) + m_writer->emit("vec<"); + emitType(inst->getDataType()); + if (isImageOp) + m_writer->emit(", 4>"); + m_writer->emit(" _metal_cas_comparand"); m_writer->emit(" = "); emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); m_writer->emit(";\n"); - - m_writer->emit(getName(inst)); - m_writer->emit(" = atomic_compare_exchange_weak_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); + if (imageSubscript) + { + emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix)); + m_writer->emit(".atomic_compare_exchange_weak("); + emitAtomicImageCoord(imageSubscript); + } + else + { + m_writer->emit("atomic_compare_exchange_weak_explicit("); + emitAtomicDestOperand(inst->getOperand(0)); + } m_writer->emit(", &_metal_cas_comparand, "); - emitOperand(inst->getOperand(2), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(3)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(4)); - m_writer->emit(");\n}\n"); + emitAtomicSrcOperand(isImageOp, inst->getOperand(2)); + if (!isImageOp) + { + m_writer->emit(", "); + emitMemoryOrderOperand(inst->getOperand(3)); + m_writer->emit(", "); + emitMemoryOrderOperand(inst->getOperand(4)); + } + m_writer->emit(");\n"); + m_writer->emit(getName(inst)); + m_writer->emit(" = _metal_cas_comparand"); + if (isImageOp) + m_writer->emit(".x"); + m_writer->emit(";\n}\n"); return true; } case kIROp_AtomicAdd: { - emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_add_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + if (isFloatingType(inst->getDataType())) + diagnoseFloatAtommic(); + + emitAtomicOp("atomic_fetch_add", "atomic_fetch_add_explicit"); return true; } case kIROp_AtomicSub: { - emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_sub_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + if (isFloatingType(inst->getDataType())) + diagnoseFloatAtommic(); + + emitAtomicOp("atomic_fetch_sub", "atomic_fetch_sub_explicit"); return true; } case kIROp_AtomicAnd: { - emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_and_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + emitAtomicOp("atomic_fetch_and", "atomic_fetch_and_explicit"); return true; } case kIROp_AtomicOr: { - emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_or_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + emitAtomicOp("atomic_fetch_or", "atomic_fetch_or_explicit"); return true; } case kIROp_AtomicXor: { - emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_xor_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + emitAtomicOp("atomic_fetch_xor", "atomic_fetch_xor_explicit"); return true; } case kIROp_AtomicMin: { - emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_min_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + if (isFloatingType(inst->getDataType())) + diagnoseFloatAtommic(); + + emitAtomicOp("atomic_fetch_min", "atomic_fetch_min_explicit"); return true; } case kIROp_AtomicMax: { - emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_max_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitOperand(inst->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(", "); - emitMemoryOrderOperand(inst->getOperand(2)); - m_writer->emit(");\n"); + if (isFloatingType(inst->getDataType())) + diagnoseFloatAtommic(); + + emitAtomicOp("atomic_fetch_max", "atomic_fetch_max_explicit"); return true; } case kIROp_AtomicInc: { emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_add_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", 1, "); - emitMemoryOrderOperand(inst->getOperand(1)); - m_writer->emit(");\n"); + bool isImageOp = false; + if (auto imageSubscript = isTextureAccess(inst)) + { + emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix)); + m_writer->emit(".atomic_fetch_add("); + emitAtomicImageCoord(imageSubscript); + isImageOp = true; + } + else + { + m_writer->emit("atomic_fetch_add_explicit("); + emitAtomicDestOperand(inst->getOperand(0)); + } + m_writer->emit(", 1"); + if (!isImageOp) + { + m_writer->emit(", "); + emitMemoryOrderOperand(inst->getOperand(1)); + } + if (isImageOp) + m_writer->emit(").x;\n"); + else + m_writer->emit(");\n"); return true; } case kIROp_AtomicDec: { emitInstResultDecl(inst); - m_writer->emit("atomic_fetch_sub_explicit("); - emitOperand(inst->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit(", 1, "); - emitMemoryOrderOperand(inst->getOperand(1)); - m_writer->emit(");\n"); + bool isImageOp = false; + if (auto imageSubscript = isTextureAccess(inst)) + { + emitOperand(imageSubscript->getImage(), getInfo(EmitOp::Postfix)); + m_writer->emit(".atomic_fetch_sub("); + emitAtomicImageCoord(imageSubscript); + isImageOp = true; + } + else + { + m_writer->emit("atomic_fetch_sub_explicit("); + emitAtomicDestOperand(inst->getOperand(0)); + } + m_writer->emit(", 1"); + if (!isImageOp) + { + m_writer->emit(", "); + emitMemoryOrderOperand(inst->getOperand(1)); + } + if (isImageOp) + m_writer->emit(").x;\n"); + else + m_writer->emit(");\n"); return true; } } diff --git a/source/slang/slang-emit-metal.h b/source/slang/slang-emit-metal.h index 8e33eddef..e0fe1f1c8 100644 --- a/source/slang/slang-emit-metal.h +++ b/source/slang/slang-emit-metal.h @@ -79,6 +79,11 @@ protected: void _emitStageAccessSemantic(IRStageAccessDecoration* decoration, const char* name); bool _emitUserSemantic(UnownedStringSlice semanticName, IRIntegerValue semanticIndex); bool maybeEmitSystemSemantic(IRInst* inst); + + void emitAtomicImageCoord(IRImageSubscript* subscript); + void emitAtomicDestOperand(IRInst* operand); + void emitAtomicSrcOperand(bool isImage, IRInst* operand); + void emitAtomicSemanticOperand(IRInst* inst); }; } diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 0f123b8fd..62819e6d5 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -2929,11 +2929,11 @@ struct SPIRVEmitContext void ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { + auto typeOp = atomicInst->getDataType()->getOp(); switch (op) { case SpvOpAtomicFAddEXT: { - auto typeOp = getVectorElementType(atomicInst->getDataType())->getOp(); switch (typeOp) { case kIROp_FloatType: @@ -2948,13 +2948,19 @@ struct SPIRVEmitContext ensureExtensionDeclaration(toSlice("SPV_EXT_shader_atomic_float16_add")); requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; + case kIROp_VectorType: + if (as<IRVectorType>(atomicInst->getDataType())->getElementType()->getOp() == kIROp_HalfType) + { + ensureExtensionDeclaration(toSlice("VK_NV_shader_atomic_float16_vector")); + requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + } + break; } } break; case SpvOpAtomicFMinEXT: case SpvOpAtomicFMaxEXT: { - auto typeOp = getVectorElementType(atomicInst->getDataType())->getOp(); switch (typeOp) { case kIROp_FloatType: @@ -2969,10 +2975,24 @@ struct SPIRVEmitContext ensureExtensionDeclaration(toSlice("SPV_EXT_shader_atomic_float_min_max")); requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; + case kIROp_VectorType: + if (as<IRVectorType>(atomicInst->getDataType())->getElementType()->getOp() == kIROp_HalfType) + { + ensureExtensionDeclaration(toSlice("VK_NV_shader_atomic_float16_vector")); + requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + } + break; } } break; } + switch (typeOp) + { + case kIROp_UInt64Type: + case kIROp_Int64Type: + requireSPIRVCapability(SpvCapabilityInt64Atomics); + break; + } } // The instructions that appear inside the basic blocks of @@ -3321,6 +3341,7 @@ struct SPIRVEmitContext const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1)); result = emitOpAtomicIIncrement(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics); + ensureAtomicCapability(inst, SpvOpAtomicIIncrement); } break; case kIROp_AtomicDec: @@ -3329,6 +3350,7 @@ struct SPIRVEmitContext const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1)); result = emitOpAtomicIDecrement(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics); + ensureAtomicCapability(inst, SpvOpAtomicIDecrement); } break; case kIROp_AtomicLoad: @@ -3337,6 +3359,7 @@ struct SPIRVEmitContext const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1)); result = emitOpAtomicLoad(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics); + ensureAtomicCapability(inst, SpvOpAtomicLoad); } break; case kIROp_AtomicStore: @@ -3345,6 +3368,7 @@ struct SPIRVEmitContext const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2)); result = emitOpAtomicStore(parent, inst, inst->getOperand(0), memoryScope, memorySemantics, inst->getOperand(1)); + ensureAtomicCapability(inst, SpvOpAtomicStore); } break; case kIROp_AtomicExchange: @@ -3353,6 +3377,7 @@ struct SPIRVEmitContext const auto memoryScope = emitIntConstant(IRIntegerValue{ SpvScopeDevice }, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2)); result = emitOpAtomicExchange(parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemantics, inst->getOperand(1)); + ensureAtomicCapability(inst, SpvOpAtomicExchange); } break; case kIROp_AtomicCompareExchange: @@ -3365,6 +3390,7 @@ struct SPIRVEmitContext parent, inst, inst->getFullType(), inst->getOperand(0), memoryScope, memorySemanticsEqual, memorySemanticsUnequal, inst->getOperand(2), inst->getOperand(1)); + ensureAtomicCapability(inst, SpvOpAtomicCompareExchange); } break; case kIROp_AtomicAdd: diff --git a/source/slang/slang-emit.cpp b/source/slang/slang-emit.cpp index c9319a13b..2206d29cf 100644 --- a/source/slang/slang-emit.cpp +++ b/source/slang/slang-emit.cpp @@ -53,9 +53,7 @@ #include "slang-ir-lower-l-value-cast.h" #include "slang-ir-lower-reinterpret.h" #include "slang-ir-loop-unroll.h" -#include "slang-ir-legalize-extract-from-texture-access.h" #include "slang-ir-legalize-image-subscript.h" -#include "slang-ir-legalize-is-texture-access.h" #include "slang-ir-legalize-vector-types.h" #include "slang-ir-metadata.h" #include "slang-ir-optix-entry-point-uniforms.h" @@ -1058,9 +1056,6 @@ Result linkAndOptimizeIR( legalizeVectorTypes(irModule, sink); - // Legalize `__isTextureAccess` and related. - legalizeIsTextureAccess(irModule, sink); - // Once specialization and type legalization have been performed, // we should perform some of our basic optimization steps again, // to see if we can clean up any temporaries created by legalization. @@ -1335,8 +1330,6 @@ Result linkAndOptimizeIR( // Create aliases for all dynamic resource parameters. if(requiredLoweringPassSet.dynamicResource && isKhronosTarget(targetRequest)) legalizeDynamicResourcesForGLSL(codeGenContext, irModule); - - legalizeExtractFromTextureAccess(irModule); // Legalize `ImageSubscript` loads. switch (target) diff --git a/source/slang/slang-intrinsic-expand.cpp b/source/slang/slang-intrinsic-expand.cpp index 7cde70777..aabc193dd 100644 --- a/source/slang/slang-intrinsic-expand.cpp +++ b/source/slang/slang-intrinsic-expand.cpp @@ -653,112 +653,6 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor) } } break; - - case 'a': - { - // We have an operation that needs to lower to either - // `atomic*` or `imageAtomic*` for GLSL, depending on - // whether its first operand is a subscript into an - // array. This `$a` is the first `a` in `atomic`, - // so we will replace it accordingly. - // - // TODO: This distinction should be made earlier, - // with the front-end picking the right overload - // based on the "address space" of the argument. - - Index argIndex = 0; - SLANG_RELEASE_ASSERT(m_argCount > argIndex); - - auto arg = m_args[argIndex].get(); - if (arg->getOp() == kIROp_ImageSubscript) - { - m_writer->emit("imageA"); - } - else - { - m_writer->emit("a"); - } - } - break; - - case 'A': - { - // We have an operand that represents the destination - // of an atomic operation in GLSL, and it should - // be lowered based on whether it is an ordinary l-value, - // or an image subscript. In the image subscript case - // this operand will turn into multiple arguments - // to the `imageAtomic*` function. - // - - Index argIndex = 0; - SLANG_RELEASE_ASSERT(m_argCount > argIndex); - - auto arg = m_args[argIndex].get(); - if (arg->getOp() == kIROp_ImageSubscript) - { - if (m_emitter->getSourceLanguage() == SourceLanguage::GLSL) - { - // TODO: we don't handle the multisample - // case correctly here, where the last - // component of the image coordinate needs - // to be broken out into its own argument. - // - m_writer->emit("("); - m_emitter->emitOperand(arg->getOperand(0), getInfo(EmitOp::General)); - m_writer->emit("), "); - - // The coordinate argument will have been computed - // as a `vector<uint, N>` because that is how the - // HLSL image subscript operations are defined. - // In contrast, the GLSL `imageAtomic*` operations - // expect `vector<int, N>` coordinates, so we - // will hackily insert the conversion here as - // part of the intrinsic op. - // - auto coords = arg->getOperand(1); - auto coordsType = coords->getDataType(); - - auto coordsVecType = as<IRVectorType>(coordsType); - IRIntegerValue elementCount = 1; - if (coordsVecType) - { - coordsType = coordsVecType->getElementType(); - elementCount = getIntVal(coordsVecType->getElementCount()); - } - - SLANG_ASSERT(coordsType->getOp() == kIROp_UIntType); - - if (elementCount > 1) - { - m_writer->emit("ivec"); - m_writer->emit(elementCount); - } - else - { - m_writer->emit("int"); - } - - m_writer->emit("("); - m_emitter->emitOperand(arg->getOperand(1), getInfo(EmitOp::General)); - m_writer->emit(")"); - } - else - { - m_writer->emit("("); - m_emitter->emitOperand(arg, getInfo(EmitOp::General)); - m_writer->emit(")"); - } - } - else - { - m_writer->emit("("); - m_emitter->emitOperand(arg, getInfo(EmitOp::General)); - m_writer->emit(")"); - } - } - break; - case 'P': // Type-based prefix as used for CUDA and C++ targets { diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.cpp b/source/slang/slang-ir-legalize-extract-from-texture-access.cpp deleted file mode 100644 index de1e244a8..000000000 --- a/source/slang/slang-ir-legalize-extract-from-texture-access.cpp +++ /dev/null @@ -1,136 +0,0 @@ -#include "slang-ir-legalize-extract-from-texture-access.h" - -#include "slang-ir.h" -#include "slang-ir-insts.h" -#include "slang-ir-util.h" -#include "slang-ir-clone.h" -#include "slang-ir-specialize-address-space.h" -#include "slang-parameter-binding.h" -#include "slang-ir-legalize-image-subscript.h" -#include "slang-ir-legalize-varying-params.h" -#include "slang-ir-simplify-cfg.h" - -namespace Slang -{ - void legalizeExtractTextureFromTextureAccess(IRBuilder& builder, IRInst* inst) - { - SLANG_ASSERT(inst); - - builder.setInsertBefore(inst); - IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); - SLANG_ASSERT(imageSubscript); - SLANG_ASSERT(imageSubscript->getImage()); - inst->replaceUsesWith(imageSubscript->getImage()); - inst->removeAndDeallocate(); - // Ensure we are done processing the imageSubscript before we remove it - if (!imageSubscript->hasUses()) - imageSubscript->removeAndDeallocate(); - } - - void legalizeExtractArrayCoordFromTextureAccess(IRBuilder& builder, IRInst* inst) - { - SLANG_ASSERT(inst); - - builder.setInsertBefore(inst); - IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); - SLANG_ASSERT(imageSubscript); - SLANG_ASSERT(imageSubscript->getImage()); - - auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType()); - IRInst* coord = imageSubscript->getCoord(); - if(image->isArray()) - { - // Extract final element which is 'ArrayCoord' - IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType()); - SLANG_ASSERT(coordType); - auto coordSize = getIRVectorElementSize(coordType); - - IRType* newArrayCoordType = coordType->getElementType(); - auto arrayCoordLocation = coordSize - 1; - List<UInt> swizzleIndicies = { (UInt)arrayCoordLocation }; - - coord = builder.emitSwizzle(newArrayCoordType, coord, 1, swizzleIndicies.getBuffer()); - } - else - coord = builder.getIntValue(builder.getUIntType(), 0); - - - inst->replaceUsesWith(coord); - inst->removeAndDeallocate(); - // Ensure we are done processing the imageSubscript completly before we remove it - if (!imageSubscript->hasUses()) - imageSubscript->removeAndDeallocate(); - } - - void legalizeExtractCoordFromTextureAccess(IRBuilder& builder, IRInst* inst) - { - SLANG_ASSERT(inst); - - builder.setInsertBefore(inst); - IRImageSubscript* imageSubscript = as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); - SLANG_ASSERT(imageSubscript); - SLANG_ASSERT(imageSubscript->getImage()); - - auto image = as<IRTextureType>(imageSubscript->getImage()->getDataType()); - IRInst* coord = imageSubscript->getCoord(); - if(image->isArray()) - { - // Extract all but final element which is 'ArrayCoord' - IRVectorType* coordType = as<IRVectorType>(imageSubscript->getCoord()->getDataType()); - auto coordSize = getIRVectorElementSize(coordType); - SLANG_ASSERT(coordType); - - IRType* newCoordType = nullptr; - auto newCoordSize = coordSize - 1; - if(newCoordSize != 1) - newCoordType = builder.getVectorType(coordType->getElementType(), newCoordSize); - else - newCoordType = coordType->getElementType(); - List<UInt> swizzleIndicies = {1, 2, 3, 4}; - - coord = builder.emitSwizzle(newCoordType, coord, newCoordSize, swizzleIndicies.getBuffer()); - } - - inst->replaceUsesWith(coord); - inst->removeAndDeallocate(); - // Ensure we are done processing the imageSubscript completly before we remove it - if (!imageSubscript->hasUses()) - imageSubscript->removeAndDeallocate(); - } - - void legalizeExtractFromTextureAccess(IRModule* module) - { - IRBuilder builder(module); - for (auto globalInst : module->getModuleInst()->getChildren()) - { - auto func = as<IRFunc>(globalInst); - if (!func) - continue; - for (auto block : func->getBlocks()) - { - auto inst = block->getFirstInst(); - IRInst* next; - for ( ; inst; inst = next) - { - next = inst->getNextInst(); - switch (inst->getOp()) - { - case kIROp_ExtractArrayCoordFromTextureAccess: - if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0)))) - legalizeExtractArrayCoordFromTextureAccess(builder, inst); - continue; - case kIROp_ExtractCoordFromTextureAccess: - if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0)))) - legalizeExtractCoordFromTextureAccess(builder, inst); - continue; - case kIROp_ExtractTextureFromTextureAccess: - if (as<IRImageSubscript>(getRootAddr(inst->getOperand(0)))) - legalizeExtractTextureFromTextureAccess(builder, inst); - continue; - } - } - } - } - } -} - diff --git a/source/slang/slang-ir-legalize-extract-from-texture-access.h b/source/slang/slang-ir-legalize-extract-from-texture-access.h deleted file mode 100644 index 016c86def..000000000 --- a/source/slang/slang-ir-legalize-extract-from-texture-access.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include "slang-ir.h" -#include "slang-compiler.h" - -namespace Slang -{ - class DiagnosticSink; - - void legalizeExtractFromTextureAccess(IRModule* module); -} diff --git a/source/slang/slang-ir-legalize-is-texture-access.cpp b/source/slang/slang-ir-legalize-is-texture-access.cpp deleted file mode 100644 index b9a0a7772..000000000 --- a/source/slang/slang-ir-legalize-is-texture-access.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include "slang-ir-legalize-is-texture-access.h" - -#include "slang-ir.h" -#include "slang-ir-insts.h" -#include "slang-ir-util.h" -#include "slang-ir-clone.h" -#include "slang-ir-specialize-address-space.h" -#include "slang-parameter-binding.h" -#include "slang-ir-legalize-image-subscript.h" -#include "slang-ir-legalize-varying-params.h" -#include "slang-ir-sccp.h" - -namespace Slang -{ - IRImageSubscript* getTextureAccess(IRInst* inst) - { - return as<IRImageSubscript>(getRootAddr(inst->getOperand(0))); - } - - void legalizeIsTextureAccess(IRModule* module, DiagnosticSink* sink) - { - HashSet<IRFunc*> functionsToSCCP; - IRBuilder builder(module); - for (auto globalInst : module->getModuleInst()->getChildren()) - { - auto func = as<IRFunc>(globalInst); - if (!func) - continue; - for (auto block : func->getBlocks()) - { - auto inst = block->getFirstInst(); - IRInst* next; - for ( ; inst; inst = next) - { - next = inst->getNextInst(); - switch (inst->getOp()) - { - case kIROp_IsTextureAccess: - if (getTextureAccess(inst)) - inst->replaceUsesWith(builder.getBoolValue(true)); - else - inst->replaceUsesWith(builder.getBoolValue(false)); - inst->removeAndDeallocate(); - functionsToSCCP.add(func); - continue; - case kIROp_IsTextureArrayAccess: - { - auto textureAccess = getTextureAccess(inst); - if (textureAccess && as<IRTextureType>(textureAccess->getImage()->getDataType())->isArray()) - inst->replaceUsesWith(builder.getBoolValue(true)); - else - inst->replaceUsesWith(builder.getBoolValue(false)); - inst->removeAndDeallocate(); - functionsToSCCP.add(func); - continue; - } - case kIROp_IsTextureScalarAccess: - { - auto textureAccess = getTextureAccess(inst); - if (textureAccess && !as<IRVectorType>(as<IRTextureType>(textureAccess->getImage()->getDataType())->getElementType())) - inst->replaceUsesWith(builder.getBoolValue(true)); - else - inst->replaceUsesWith(builder.getBoolValue(false)); - inst->removeAndDeallocate(); - functionsToSCCP.add(func); - continue; - } - } - } - } - } - // Requires a SCCP to ensure Slang does not evaluate 'IRTextureType' code path - // and unresolved 'isTextureAccess' operations for when 'inst' is not a - // 'IRTextureType'/`TextureAccessor` - for (auto func : functionsToSCCP) - applySparseConditionalConstantPropagation(func, sink); - } -} - diff --git a/source/slang/slang-ir-legalize-is-texture-access.h b/source/slang/slang-ir-legalize-is-texture-access.h deleted file mode 100644 index 9b9e1cca0..000000000 --- a/source/slang/slang-ir-legalize-is-texture-access.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include "slang-ir.h" -#include "slang-compiler.h" - -namespace Slang -{ - class DiagnosticSink; - - void legalizeIsTextureAccess(IRModule* module, DiagnosticSink* sink); -} diff --git a/source/slang/slang-ir-use-uninitialized-values.cpp b/source/slang/slang-ir-use-uninitialized-values.cpp index 98fd9841a..fea55de8d 100644 --- a/source/slang/slang-ir-use-uninitialized-values.cpp +++ b/source/slang/slang-ir-use-uninitialized-values.cpp @@ -315,8 +315,11 @@ namespace Slang case kIROp_Unmodified: return Store; - // ... and the rest will load/use them default: + // Default case is that if the instruction is a pointer, it + // is considered a store, otherwise a load. + if (as<IRPtrTypeBase>(user->getDataType())) + return Store; return Load; } } diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index d0dcfd4fb..e0998779a 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -5092,7 +5092,7 @@ namespace Slang auto inst = createInst<IRAtomicStore>( this, kIROp_AtomicStore, - nullptr, + getVoidType(), dstPtr, srcVal, memoryOrder); diff --git a/tests/bugs/gh-3997.slang b/tests/bugs/gh-3997.slang index 8c75da426..d42e65e39 100644 --- a/tests/bugs/gh-3997.slang +++ b/tests/bugs/gh-3997.slang @@ -10,7 +10,7 @@ float atomicAdd(__ref float value, float amount) __requirePrelude("#include <atomic>"); __intrinsic_asm "std::atomic_ref(*$0).fetch_add($1)"; case spirv: - return __atomicAdd(value, amount); + return __atomic_add(value, amount); } } diff --git a/tests/compute/atomics-invalid-dest-type.slang b/tests/compute/atomics-invalid-dest-type.slang index 864debaee..5ae03a5c7 100644 --- a/tests/compute/atomics-invalid-dest-type.slang +++ b/tests/compute/atomics-invalid-dest-type.slang @@ -1,11 +1,8 @@ // atomics-buffer.slang -//TEST:SIMPLE(filecheck=CHECK): -target spirv -stage compute -entry computeMain -//TEST:SIMPLE(filecheck=CHECK): -target hlsl -stage compute -entry computeMain -//TEST:SIMPLE(filecheck=CHECK): -target glsl -stage compute -entry computeMain //TEST:SIMPLE(filecheck=CHECK): -target metal -stage compute -entry computeMain -//CHECK: Atomic must be applied to a scalar texture or non-texture +//CHECK: atomic operation on non-scalar texture RWBuffer<uint2> outputBuffer; diff --git a/tests/compute/nonuniformres-atomic.slang b/tests/compute/nonuniformres-atomic.slang index 95ae502dc..10dd30cb0 100644 --- a/tests/compute/nonuniformres-atomic.slang +++ b/tests/compute/nonuniformres-atomic.slang @@ -9,7 +9,7 @@ RWTexture2D<uint> texArray[2]; void main( uint2 dispatchThreadID : SV_DispatchThreadID, uint2 groupThreadID : SV_GroupThreadID ) { - // CHECK0: imageAtomicAdd((texArray_{{.*}}[nonuniformEXT({{.*}})] + // CHECK0: {{.*}}imageAtomicAdd(texArray_{{.*}}[nonuniformEXT({{.*}})] // CHECK1: InterlockedAdd(texArray_{{.*}}[NonUniformResourceIndex({{.*}})] diff --git a/tests/hlsl-intrinsic/texture/float-atomics.slang b/tests/hlsl-intrinsic/texture/float-atomics.slang index 02cb5570c..913380416 100644 --- a/tests/hlsl-intrinsic/texture/float-atomics.slang +++ b/tests/hlsl-intrinsic/texture/float-atomics.slang @@ -24,6 +24,6 @@ void computeMain(uint3 tid : SV_DispatchThreadID) AllMemoryBarrier(); // CHECK: 4.0 - outputBuffer[0] = t[uint2(1, 0)]; + outputBuffer[0] = t[uint2(1, 0)] + originalValue; } diff --git a/tests/metal/atomic-byteaddressbuffer.slang b/tests/metal/atomic-byteaddressbuffer.slang new file mode 100644 index 000000000..677f80dbf --- /dev/null +++ b/tests/metal/atomic-byteaddressbuffer.slang @@ -0,0 +1,57 @@ +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -compute -shaderobj -output-using-type +//TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-cuda -compute -shaderobj -output-using-type +//TEST:SIMPLE(filecheck=LIB):-target metallib -entry computeMain -stage compute -DMETAL + +//TEST_INPUT:ubuffer(data=[0 0 0 0 0]):name=uintBuffer +RWByteAddressBuffer uintBuffer; + +//TEST_INPUT: ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ], stride=4):out,name outputBuffer +RWStructuredBuffer<float> outputBuffer; + +[numthreads(1,1,1)] +void computeMain() +{ + uintBuffer.InterlockedAdd(0, 1); + int oldValue; + //LIB: call {{.*}}.atomic.global.add.u.i32 + uintBuffer.InterlockedAdd(0, 1, oldValue); + // CHK: 1 + outputBuffer[0] = oldValue; + + uintBuffer.InterlockedAdd(0, 1, oldValue); + // CHK: 2 + outputBuffer[1] = (int)oldValue; + + uintBuffer.InterlockedCompareExchange(0, 3, 4, oldValue); + // CHK: 3 + outputBuffer[2] = (int)oldValue; + + uintBuffer.InterlockedOr(0, 3, oldValue); + // CHK: 4 + outputBuffer[3] = oldValue; // 4 + + uintBuffer.InterlockedExchange(0, 4, oldValue); + // CHK: 7 + outputBuffer[4] = oldValue; // 7 + + uintBuffer.InterlockedMin(0, 3, oldValue); + // CHK: 4 + outputBuffer[5] = oldValue; // 4 + + uintBuffer.InterlockedMax(0, 4, oldValue); + // CHK: 3 + outputBuffer[6] = oldValue; // 3 + + uintBuffer.InterlockedAnd(0, 7, oldValue); + // CHK: 4 + outputBuffer[7] = oldValue; // 4 + + uintBuffer.InterlockedXor(0, 7, oldValue); + // CHK: 4 + outputBuffer[8] = oldValue; // 4 + + // CHK: 3 + outputBuffer[9] = uintBuffer.Load(0); + +}
\ No newline at end of file diff --git a/tests/metal/atomic-intrinsics.slang b/tests/metal/atomic-intrinsics.slang index 5d47db913..afa0e5365 100644 --- a/tests/metal/atomic-intrinsics.slang +++ b/tests/metal/atomic-intrinsics.slang @@ -1,8 +1,7 @@ //TEST:SIMPLE(filecheck=MTL):-target metal -entry computeMain -stage compute -DMETAL //TEST:SIMPLE(filecheck=LIB):-target metallib -entry computeMain -stage compute -DMETAL //TEST(compute):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj -output-using-type -//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -emit-spirv-directly -compute -shaderobj -output-using-type -//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -emit-spirv-via-glsl -compute -shaderobj -output-using-type +//TEST(compute, vulkan):COMPARE_COMPUTE_EX(filecheck-buffer=CHK):-vk -compute -shaderobj -output-using-type //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -output-using-type //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj -output-using-type @@ -36,22 +35,22 @@ void computeMain(uint groupIndex : SV_GroupIndex) float val = 0.0f; // InterlockedAdd - //MTL: atomic_uint threadgroup* {{.*}}shareMemUI + //MTL: atomic_uint threadgroup*{{.*}}shareMemUI //LIB: call {{.*}}.atomic.local.add.u.i32 InterlockedAdd(shareMemUI[idx], uint(1)); val += shareMemUI[idx]; - //MTL: atomic_int threadgroup* {{.*}}shareMemI + //MTL: atomic_int threadgroup*{{.*}}shareMemI //LIB: call {{.*}}.atomic.local.add.s.i32 InterlockedAdd(shareMemI[idx], 2); val += shareMemI[idx]; - //MTL: atomic_uint device* {{.*}}uintBuffer + //MTL: atomic_uint device*{{.*}}uintBuffer //LIB: call {{.*}}.atomic.global.add.u.i32 InterlockedAdd(uintBuffer[idx], 1); val += uintBuffer[idx]; - //MTL: atomic_int device* {{.*}}intBuffer + //MTL: atomic_int device*{{.*}}intBuffer //LIB: call {{.*}}.atomic.global.add.s.i32 InterlockedAdd(intBuffer[idx], 2); val += intBuffer[idx]; diff --git a/tests/metal/atomic-texture-buffer.slang b/tests/metal/atomic-texture-buffer.slang index 3e4eda94b..1db156364 100644 --- a/tests/metal/atomic-texture-buffer.slang +++ b/tests/metal/atomic-texture-buffer.slang @@ -2,7 +2,7 @@ //TEST:SIMPLE(filecheck=METAL_FLOAT): -target metal -stage compute -entry computeMain -DFLOAT //TEST:SIMPLE(filecheck=METALLIB): -target metallib -stage compute -entry computeMain -// METAL_FLOAT: 'float' atomic texture operations are disallowed with Metal target's +// METAL_FLOAT: floating point atomic operation //METALLIB: @computeMain diff --git a/tests/slang-extension/atomic-int64-byte-address-buffer.slang b/tests/slang-extension/atomic-int64-byte-address-buffer.slang index 9a7ae3b61..61e38069d 100644 --- a/tests/slang-extension/atomic-int64-byte-address-buffer.slang +++ b/tests/slang-extension/atomic-int64-byte-address-buffer.slang @@ -5,7 +5,7 @@ // No support for int64_t on fxc - we need SM6.0 and dxil // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj diff --git a/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang index 4ab67df8e..2fce9788a 100644 --- a/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang +++ b/tests/slang-extension/atomic-min-max-u64-byte-address-buffer.slang @@ -5,7 +5,7 @@ // No support for int64_t on fxc - we need SM6.0 and dxil // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj // For some reason this doesn't work correctly on CUDA? That it behaves as if always does Min. Min and Max do appropriate // things tho, because if I force the condition I do get the right answer diff --git a/tests/slang-extension/cas-int64-byte-address-buffer.slang b/tests/slang-extension/cas-int64-byte-address-buffer.slang index 873f6ab4b..2d3189215 100644 --- a/tests/slang-extension/cas-int64-byte-address-buffer.slang +++ b/tests/slang-extension/cas-int64-byte-address-buffer.slang @@ -5,7 +5,7 @@ // No support for int64_t on fxc - we need SM6.0 and dxil // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj diff --git a/tests/slang-extension/exchange-int64-byte-address-buffer.slang b/tests/slang-extension/exchange-int64-byte-address-buffer.slang index 84654ab80..a6c1277ac 100644 --- a/tests/slang-extension/exchange-int64-byte-address-buffer.slang +++ b/tests/slang-extension/exchange-int64-byte-address-buffer.slang @@ -2,10 +2,7 @@ //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj // No support for int64_t on DX11 //DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -// No support for int64_t on fxc - we need SM6.0 and dxil -// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12 -//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -nvapi-slot u0 -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -nvapi-slot u0 -compile-arg -O2 -shaderobj +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -render-features atomic-int64 -compile-arg -O2 -shaderobj //TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -render-features atomic-int64 -shaderobj //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj diff --git a/tests/spirv/ref-this.slang b/tests/spirv/ref-this.slang index 5eaa7f3a1..de4263975 100644 --- a/tests/spirv/ref-this.slang +++ b/tests/spirv/ref-this.slang @@ -1,7 +1,7 @@ //TEST:SIMPLE(filecheck=CHECK): -target spirv // CHECK: %[[PTR:[0-9a-zA-Z_]+]] = OpAccessChain %_ptr_PhysicalStorageBuffer_uint %{{.*}} %int_0 -// CHECK: %original = OpAtomicIAdd %uint %[[PTR]] %uint_1 %uint_0 %uint_1 +// CHECK: %{{.*}} = OpAtomicIAdd %uint %[[PTR]] %uint_1 %uint_0 %uint_1 struct Buf { |
